From ad7d3bc84c3bccf2f8f714941ca7375179adfe8f Mon Sep 17 00:00:00 2001 From: Stephen Schoettler Date: Thu, 14 May 2026 19:08:38 -0700 Subject: [PATCH 001/218] test(e2e): fix Discord mock exception surface --- tests/e2e/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 332cccee497..acb999e9e34 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -66,6 +66,9 @@ def _ensure_discord_mock(): discord_mod.DMChannel = type("DMChannel", (), {}) discord_mod.Thread = type("Thread", (), {}) discord_mod.ForumChannel = type("ForumChannel", (), {}) + discord_mod.Forbidden = type("Forbidden", (Exception,), {}) + discord_mod.MessageType = SimpleNamespace(default=0, reply=19) + discord_mod.Object = lambda *, id: SimpleNamespace(id=id) discord_mod.Interaction = object discord_mod.app_commands = SimpleNamespace( describe=lambda **kwargs: (lambda fn: fn), From e8a4c85e889b8990ef4cb5d70276b286d82afac7 Mon Sep 17 00:00:00 2001 From: Stephen Schoettler Date: Thu, 14 May 2026 19:24:12 -0700 Subject: [PATCH 002/218] test(run-agent): isolate Nous provider parity model --- tests/run_agent/test_provider_parity.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index d3a5a1b37fa..c65c22004a9 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -61,6 +61,8 @@ def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="ht ) if model: kwargs["model"] = model + elif provider == "nous": + kwargs["model"] = "gpt-5" base_url="https://openrouter.ai/api/v1", api_key="test-key", base_url="https://openrouter.ai/api/v1", From f9ad7400e30517159712a77e6a4bc2f3a390b2db Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Thu, 14 May 2026 23:43:13 -0700 Subject: [PATCH 003/218] =?UTF-8?q?fix(goals):=20raise=20judge=20max=5Ftok?= =?UTF-8?q?ens=20200=20=E2=86=92=204096,=20make=20configurable?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The freeform /goal judge was capped at max_tokens=200, which reliably truncated the JSON verdict on reasoning-heavy models (deepseek-v4-pro, qwq, etc.) — the model burns tokens on hidden reasoning before emitting visible content, and the first /goal turn's prompt is larger than later turns, blowing past 200. Symptom: agent.log shows `judge reply was not JSON: '{"done": true, "reason": "The agent successfully'` followed by repeated `judge returned empty response` lines, then the goal pauses with a misleading 'judge model isn't returning the required JSON verdict' message. Diagnosed live by @helix4u — empirically verified that raising the budget on an unmodified worktree makes the failures go away on the exact configs users were hitting on Nous Plus subscription paths. Changes: - DEFAULT_JUDGE_MAX_TOKENS = 4096 (up from 200) - New auxiliary.goal_judge.max_tokens config knob for tuning in specifically constrained setups - _goal_judge_max_tokens() resolves the value with fail-open semantics (non-int / non-positive / load failure → default). load_config() is mtime-cached so per-turn lookup is cheap. Scoped narrowly to the verified root cause — does not introduce a submit_verdict tool-call schema (see #26162 / #23671 for that direction; they can land separately if we want them). Tests: tests/hermes_cli/test_goals.py + tests/cli/test_cli_goal_interrupt.py + tests/gateway/test_goal_verdict_send.py — 62/62 passing. E2E verified: config override honored (8192), missing/garbage/zero values fall back to 4096, no-auxiliary-section falls back to 4096. Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com> Credits: - @helix4u (Gille) — diagnosed the max_tokens=200 truncation via live testing on an unmodified worktree, drafted the original fix shape in #26162. - @AhmetArif0 — flagged the freeform judge fragility in #23671 from the tool-call angle. - @0xharryriddle (HarryRiddle.eth) — reported the issue from a Nous Plus subscription setup in #23876 with full debug reports. Closes #23876 Supersedes #26162, #23671, #23881 --- hermes_cli/goals.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py index 1542b9a7a38..62ee00547c1 100644 --- a/hermes_cli/goals.py +++ b/hermes_cli/goals.py @@ -45,6 +45,16 @@ logger = logging.getLogger(__name__) DEFAULT_MAX_TURNS = 20 DEFAULT_JUDGE_TIMEOUT = 30.0 +# Judge output budget. The freeform judge returns a one-line JSON verdict, but +# reasoning models (deepseek-v4, qwq, etc.) burn tokens on hidden reasoning +# before emitting the visible JSON — and the first /goal turn's prompt is +# larger than later turns, which pushes total reply length past tight caps. +# 200 tokens (the original default) reliably truncated the JSON on reasoning +# models, leaving '{"done": true, "reason": "The agent successfully' and +# triggering the auto-pause. 4096 covers reasoning + verdict on every model +# we've live-tested; override via auxiliary.goal_judge.max_tokens for +# specifically constrained setups. +DEFAULT_JUDGE_MAX_TOKENS = 4096 # Cap how much of the last response + recent messages we send to the judge. _JUDGE_RESPONSE_SNIPPET_CHARS = 4000 # After this many consecutive judge *parse* failures (empty output / non-JSON), @@ -282,6 +292,30 @@ def _truncate(text: str, limit: int) -> str: _JSON_OBJECT_RE = re.compile(r"\{.*?\}", re.DOTALL) +def _goal_judge_max_tokens() -> int: + """Resolve auxiliary.goal_judge.max_tokens, falling back to the default. + + ``load_config()`` is cached on the config file's (mtime, size), so calling + this once per judge turn is cheap. A non-positive or non-int value falls + back to the default rather than crashing the goal loop. + """ + try: + from hermes_cli.config import load_config + + cfg = load_config() + value = ( + (cfg.get("auxiliary") or {}) + .get("goal_judge", {}) + .get("max_tokens", DEFAULT_JUDGE_MAX_TOKENS) + ) + value = int(value) + if value > 0: + return value + except Exception: + pass + return DEFAULT_JUDGE_MAX_TOKENS + + def _parse_judge_response(raw: str) -> Tuple[bool, str, bool]: """Parse the judge's reply. Fail-open to ``(False, "", parse_failed)``. @@ -404,7 +438,7 @@ def judge_goal( {"role": "user", "content": prompt}, ], temperature=0, - max_tokens=200, + max_tokens=_goal_judge_max_tokens(), timeout=timeout, extra_body=get_auxiliary_extra_body() or None, ) From 6bdad1f3b2e31d38673146da362ca5dd4ddbb456 Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Fri, 15 May 2026 13:21:48 +0530 Subject: [PATCH 004/218] ci: add PyPI publish workflow (salvaged from #25901) (#26148) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ci(pypi): add publish workflow for automated PyPI releases Triggered by CalVer tag pushes from scripts/release.py (v20* pattern). Three jobs: build (uv build) → publish (OIDC trusted publishing) → sign (Sigstore + attach to existing GitHub Release). - workflow_dispatch as manual escape hatch - skip-existing for safe re-runs - Graceful skip when GitHub Release not found (sign job) - Top-level permissions: contents: read (CodeQL compliant) Requires one-time setup: PyPI trusted publisher + GitHub pypi environment. Co-authored-by: dmahan93 <44207705+dmahan93@users.noreply.github.com> * fix(release): address review findings - Stage acp_registry/agent.json in version bump commit (was silently left unstaged) - Add missing return when no previous tags found without --first-release - Fix get_pr_number return type annotation (str -> str | None) - Prefer uv build over python -m build (matches CI workflow), with fallback - Use unit separator (%x1f) in git log format to handle | in author names - Add explicit encoding='utf-8' to .release_notes.md write Workflow hardening: - Gracefully skip signing when GitHub Release not found (env var gate instead of exit 1, so PyPI publish still shows green) * fix(ci): harden PyPI workflow — SHA-pin actions, guard workflow_dispatch, explicit build flags - Pin all actions to commit SHAs (supply-chain hardening for id-token:write) - workflow_dispatch now requires confirm_tag input + checks out that tag - Both uv build paths explicitly pass --sdist --wheel --------- Co-authored-by: dmahan93 <44207705+dmahan93@users.noreply.github.com> --- .github/workflows/upload_to_pypi.yml | 137 +++++++++++++++++++++++++++ scripts/release.py | 36 ++++--- 2 files changed, 160 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/upload_to_pypi.yml diff --git a/.github/workflows/upload_to_pypi.yml b/.github/workflows/upload_to_pypi.yml new file mode 100644 index 00000000000..4e2fe4748d3 --- /dev/null +++ b/.github/workflows/upload_to_pypi.yml @@ -0,0 +1,137 @@ +name: Publish to PyPI + +# Triggered by CalVer tag pushes from scripts/release.py (e.g. v2026.5.15) +# Can also be triggered manually from the Actions tab as an escape hatch. +on: + push: + tags: + - 'v20*' # CalVer tags: v2026.5.15, v2026.5.15.2, etc. + workflow_dispatch: + inputs: + confirm_tag: + description: 'Tag to publish (e.g. v2026.5.15). Must already exist.' + required: true + type: string + +# Restrict default token to read-only; each job escalates as needed. +permissions: + contents: read + +# Prevent overlapping publishes (e.g. two same-day tags pushed quickly). +concurrency: + group: pypi-publish + cancel-in-progress: false + +jobs: + build: + name: Build distribution 📦 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + persist-credentials: false + # On workflow_dispatch, check out the confirmed tag. + ref: ${{ inputs.confirm_tag || github.ref }} + fetch-tags: true + + - name: Validate tag exists + if: github.event_name == 'workflow_dispatch' + run: | + if ! git tag -l "${{ inputs.confirm_tag }}" | grep -q .; then + echo "::error::Tag '${{ inputs.confirm_tag }}' does not exist in the repo" + exit 1 + fi + + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: '3.13' + + - name: Install uv + uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6 + + - name: Build wheel and sdist + run: uv build --sdist --wheel + + - name: Upload distribution artifacts + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: python-package-distributions + path: dist/ + + publish: + name: Publish to PyPI + needs: build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/hermes-agent + permissions: + id-token: write # OIDC trusted publishing + + steps: + - name: Download distribution artifacts + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: + name: python-package-distributions + path: dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0 + with: + skip-existing: true + + sign: + name: Sign and attach to GitHub Release + # Only runs on tag pushes — release.py creates the GitHub Release, + # and workflow_dispatch won't have a matching release to attach to. + if: startsWith(github.ref, 'refs/tags/') + needs: publish + runs-on: ubuntu-latest + permissions: + contents: write # attach assets to the existing release + id-token: write # sigstore signing + + steps: + - name: Download distribution artifacts + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: + name: python-package-distributions + path: dist/ + + - name: Wait for GitHub Release to exist + env: + GITHUB_TOKEN: ${{ github.token }} + # release.py creates the GitHub Release after pushing the tag, + # but this workflow starts from the tag push — wait for it. + run: | + for i in $(seq 1 30); do + if gh release view "$GITHUB_REF_NAME" --repo "$GITHUB_REPOSITORY" >/dev/null 2>&1; then + echo "Release $GITHUB_REF_NAME found" + exit 0 + fi + echo "Waiting for release... ($i/30)" + sleep 10 + done + echo "::warning::Release $GITHUB_REF_NAME not found after 5 minutes — skipping signature upload" + echo "skip_sign=true" >> "$GITHUB_ENV" + + - name: Sign with Sigstore + if: env.skip_sign != 'true' + uses: sigstore/gh-action-sigstore-python@f514d46b907ebcd5bedc05145c03b69c1edd8b46 # v3.0.0 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + + - name: Attach signed artifacts to GitHub Release + if: env.skip_sign != 'true' + env: + GITHUB_TOKEN: ${{ github.token }} + # release.py already created the GitHub Release — just upload + # the Sigstore signatures alongside the existing assets. + run: >- + gh release upload + "$GITHUB_REF_NAME" dist/*.sigstore.json + --repo "$GITHUB_REPOSITORY" + --clobber diff --git a/scripts/release.py b/scripts/release.py index d3118bc128e..53db4bbec2c 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -1188,15 +1188,21 @@ def _update_acp_registry_versions(semver: str) -> None: def build_release_artifacts(semver: str) -> list[Path]: """Build sdist/wheel artifacts for the current release. - Returns the artifact paths when the local environment has ``python -m build`` - available. If build tooling is missing or the build fails, returns an empty - list and lets the release proceed without attached Python artifacts. + Tries ``uv build`` first (matching the CI workflow), falls back to + ``python -m build`` if uv is unavailable. """ dist_dir = REPO_ROOT / "dist" shutil.rmtree(dist_dir, ignore_errors=True) + # Prefer uv build (matches CI workflow), fall back to python -m build. + uv_bin = shutil.which("uv") + if uv_bin: + cmd = [uv_bin, "build", "--sdist", "--wheel"] + else: + cmd = [sys.executable, "-m", "build", "--sdist", "--wheel"] + result = subprocess.run( - [sys.executable, "-m", "build", "--sdist", "--wheel"], + cmd, cwd=str(REPO_ROOT), capture_output=True, text=True, @@ -1209,7 +1215,7 @@ def build_release_artifacts(semver: str) -> list[Path]: print(f" {stderr.splitlines()[-1]}") elif stdout: print(f" {stdout.splitlines()[-1]}") - print(" Install the 'build' package to attach semver-named sdist/wheel assets.") + print(" Install uv or the 'build' package to attach sdist/wheel assets.") return [] artifacts = sorted(p for p in dist_dir.iterdir() if p.is_file()) @@ -1316,11 +1322,11 @@ def get_commits(since_tag=None): else: range_spec = "HEAD" - # Format: hash|author_name|author_email|subject\0body - # Using %x00 (null) as separator between subject and body + # Format: hashauthor_nameauthor_emailsubject\0body + # Using %x1f (unit separator) to avoid conflict with | in author names log = git( "log", range_spec, - "--format=%H|%an|%ae|%s%x00%b%x00", + "--format=%H%x1f%an%x1f%ae%x1f%s%x00%b%x00", "--no-merges", ) @@ -1334,14 +1340,14 @@ def get_commits(since_tag=None): entry = entry.strip() if not entry: continue - # Split on first null to separate "hash|name|email|subject" from "body" + # Split on first null to separate "hashnameemailsubject" from "body" if "\0" in entry: header, body = entry.split("\0", 1) body = body.strip() else: header = entry body = "" - parts = header.split("|", 3) + parts = header.split("\x1f", 3) if len(parts) != 4: continue sha, name, email, subject = parts @@ -1361,7 +1367,7 @@ def get_commits(since_tag=None): return commits -def get_pr_number(subject: str) -> str: +def get_pr_number(subject: str) -> str | None: """Extract PR number from commit subject if present.""" match = re.search(r"#(\d+)", subject) if match: @@ -1512,6 +1518,7 @@ def main(): print("No previous tags found. Use --first-release for the initial release.") print(f"Would create tag: {tag_name}") print(f"Would set version: {new_version}") + return # Get commits commits = get_commits(since_tag=prev_tag) @@ -1556,7 +1563,10 @@ def main(): print(f" ✓ Updated version files to v{new_version} ({calver_date})") # Commit version bump - add_result = git_result("add", str(VERSION_FILE), str(PYPROJECT_FILE)) + add_files = [str(VERSION_FILE), str(PYPROJECT_FILE)] + if ACP_REGISTRY_MANIFEST.exists(): + add_files.append(str(ACP_REGISTRY_MANIFEST)) + add_result = git_result("add", *add_files) if add_result.returncode != 0: print(f" ✗ Failed to stage version files: {add_result.stderr.strip()}") return @@ -1598,7 +1608,7 @@ def main(): # Create GitHub release changelog_file = REPO_ROOT / ".release_notes.md" - changelog_file.write_text(changelog) + changelog_file.write_text(changelog, encoding="utf-8") gh_cmd = [ "gh", "release", "create", tag_name, From d57a4b3eb51e5c445923d33a5c3da9266e62790b Mon Sep 17 00:00:00 2001 From: libo1106 Date: Sun, 10 May 2026 00:17:13 +0800 Subject: [PATCH 005/218] feat(yuanbao): add _parse_resource_id and update _extract_text for ybres anchors --- gateway/platforms/yuanbao.py | 48 +++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py index d79da7856ae..68184b6cd29 100644 --- a/gateway/platforms/yuanbao.py +++ b/gateway/platforms/yuanbao.py @@ -1645,6 +1645,25 @@ class ExtractContentMiddleware(InboundMiddleware): return None return f"[link: {link} | visit link for full content]" + @staticmethod + def _parse_resource_id(url: str) -> str: + """Extract resourceId from Yuanbao resource URL query parameters. + + Args: + url: Resource URL (e.g., https://...?resourceId=abc123) + + Returns: + Resource ID string, or empty string if not found + """ + if not url: + return "" + try: + query = urllib.parse.parse_qs(urllib.parse.urlparse(url).query) + ids = query.get("resourceId") or query.get("resourceid") or [] + return str(ids[0]).strip() if ids else "" + except Exception: + return "" + @classmethod def _extract_text(cls, msg_body: list) -> str: """Extract plain text content from MsgBody. @@ -1668,14 +1687,35 @@ class ExtractContentMiddleware(InboundMiddleware): if text: parts.append(text) elif elem_type == "TIMImageElem": - parts.append("[image]") + # Extract resourceId from image_info_array URL + image_info_array = content.get("image_info_array") + if not isinstance(image_info_array, list): + image_info_array = [] + image_info = None + # Prefer medium image (index 1), fallback to index 0 + if len(image_info_array) > 1 and isinstance(image_info_array[1], dict): + image_info = image_info_array[1] + elif len(image_info_array) > 0 and isinstance(image_info_array[0], dict): + image_info = image_info_array[0] + image_url = str((image_info or {}).get("url") or "").strip() + rid = cls._parse_resource_id(image_url) + parts.append(f"[image|ybres:{rid}]" if rid else "[image]") elif elem_type == "TIMFileElem": filename = content.get("file_name", content.get("fileName", content.get("filename", ""))) - parts.append(f"[file: {filename}]" if filename else "[file]") + file_url = str(content.get("url") or "").strip() + rid = cls._parse_resource_id(file_url) + if rid: + parts.append(f"[file:{filename}|ybres:{rid}]" if filename else f"[file|ybres:{rid}]") + else: + parts.append(f"[file: {filename}]" if filename else "[file]") elif elem_type == "TIMSoundElem": - parts.append("[voice]") + sound_url = str(content.get("url") or "").strip() + rid = cls._parse_resource_id(sound_url) + parts.append(f"[voice|ybres:{rid}]" if rid else "[voice]") elif elem_type == "TIMVideoFileElem": - parts.append("[video]") + video_url = str(content.get("url") or "").strip() + rid = cls._parse_resource_id(video_url) + parts.append(f"[video|ybres:{rid}]" if rid else "[video]") elif elem_type == "TIMCustomElem": data_val = content.get("data", "") if data_val: From 80efe664ce5d822b31ca6c76162c6e1f7500796a Mon Sep 17 00:00:00 2001 From: libo1106 Date: Sun, 10 May 2026 00:17:13 +0800 Subject: [PATCH 006/218] feat(yuanbao): add quote_media_refs extraction to QuoteContextMiddleware --- gateway/platforms/yuanbao.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py index 68184b6cd29..be296558177 100644 --- a/gateway/platforms/yuanbao.py +++ b/gateway/platforms/yuanbao.py @@ -925,6 +925,7 @@ class InboundContext: # Populated by QuoteContextMiddleware reply_to_message_id: Optional[str] = None reply_to_text: Optional[str] = None + quote_media_refs: list = dc_field(default_factory=list) # List of (rid, kind, filename) # Populated by MediaResolveMiddleware media_urls: list = dc_field(default_factory=list) @@ -2172,22 +2173,23 @@ class QuoteContextMiddleware(InboundMiddleware): name = "quote-context" @staticmethod - def _extract_quote_context(cloud_custom_data: str) -> Tuple[Optional[str], Optional[str]]: + def _extract_quote_context(cloud_custom_data: str) -> Tuple[Optional[str], Optional[str], list]: """Extract quote context, mapping to MessageEvent.reply_to_*. Returns: - (reply_to_message_id, reply_to_text) + (reply_to_message_id, reply_to_text, quote_media_refs) + where quote_media_refs is a list of (rid, kind, filename) tuples """ if not cloud_custom_data: - return None, None + return None, None, [] try: parsed = json.loads(cloud_custom_data) except (json.JSONDecodeError, TypeError): - return None, None + return None, None, [] quote = parsed.get("quote") if isinstance(parsed, dict) else None if not isinstance(quote, dict): - return None, None + return None, None, [] # type=2 corresponds to image reference; desc may be empty, provide a placeholder. quote_type = int(quote.get("type") or 0) @@ -2195,15 +2197,25 @@ class QuoteContextMiddleware(InboundMiddleware): if quote_type == 2 and not desc: desc = "[image]" if not desc: - return None, None + return None, None, [] quote_id = str(quote.get("id") or "").strip() or None sender = str(quote.get("sender_nickname") or quote.get("sender_id") or "").strip() quote_text = f"{sender}: {desc}" if sender else desc - return quote_id, quote_text + + # Extract media references from desc using _YB_RES_REF_RE regex + media_refs: list = [] + for m in _YB_RES_REF_RE.finditer(desc): + head = m.group(1) # "image" | "file:" | "voice" | "video" + rid = m.group(2) + kind, _, filename = head.partition(":") + kind = kind.strip() + media_refs.append((rid, kind, filename.strip())) + + return quote_id, quote_text, media_refs async def handle(self, ctx: InboundContext, next_fn) -> None: - ctx.reply_to_message_id, ctx.reply_to_text = self._extract_quote_context(ctx.cloud_custom_data) + ctx.reply_to_message_id, ctx.reply_to_text, ctx.quote_media_refs = self._extract_quote_context(ctx.cloud_custom_data) await next_fn() From 3df26b925cae7761763e43f03978600d175417c5 Mon Sep 17 00:00:00 2001 From: libo1106 Date: Sun, 10 May 2026 00:17:13 +0800 Subject: [PATCH 007/218] feat(yuanbao): prioritize quote media refs over history backfill in DispatchMiddleware --- gateway/platforms/yuanbao.py | 69 ++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py index be296558177..5696e2667d1 100644 --- a/gateway/platforms/yuanbao.py +++ b/gateway/platforms/yuanbao.py @@ -2510,26 +2510,57 @@ class DispatchMiddleware(InboundMiddleware): media_urls = list(ctx.media_urls) media_types = list(ctx.media_types) - # Backfill observed media from recent transcript history - extra_img_urls: List[str] = [] - extra_img_mimes: List[str] = [] - try: - extra_img_urls, extra_img_mimes = await MediaResolveMiddleware._collect_observed_media( - adapter, ctx.source, - ) - except Exception as exc: - logger.warning( - "[%s] observed-image hydration raised, continuing anyway: %s", - adapter.name, exc, - ) - if extra_img_urls: - current = set(media_urls) - for u, m in zip(extra_img_urls, extra_img_mimes): - if u in current: + # If user quoted a message (reply_to_message_id is set), resolve only + # quote_media_refs to avoid injecting unrelated history media. + # Otherwise, backfill observed media from recent transcript history. + if ctx.reply_to_message_id is not None: + # User quoted a message — resolve only media from the quote + for rid, kind, filename in ctx.quote_media_refs: + if kind not in ("image", "file"): continue - media_urls.append(u) - media_types.append(m) - current.add(u) + try: + fresh_url = await MediaResolveMiddleware._resolve_by_resource_id(adapter, rid) + except Exception as exc: + logger.warning( + "[%s] quote media resolve failed: rid=%s kind=%s err=%s", + adapter.name, rid, kind, exc, + ) + continue + cached = await MediaResolveMiddleware._download_and_cache( + adapter, + fetch_url=fresh_url, + kind=kind, + file_name=filename or None, + log_tag=f"quote rid={rid}", + ) + if cached is None: + continue + path, mime = cached + # Avoid duplicates + if path not in media_urls: + media_urls.append(path) + media_types.append(mime) + else: + # No quote — backfill observed media from recent transcript history + extra_img_urls: List[str] = [] + extra_img_mimes: List[str] = [] + try: + extra_img_urls, extra_img_mimes = await MediaResolveMiddleware._collect_observed_media( + adapter, ctx.source, + ) + except Exception as exc: + logger.warning( + "[%s] observed-image hydration raised, continuing anyway: %s", + adapter.name, exc, + ) + if extra_img_urls: + current = set(media_urls) + for u, m in zip(extra_img_urls, extra_img_mimes): + if u in current: + continue + media_urls.append(u) + media_types.append(m) + current.add(u) # Replace [kind|ybres:xxx] anchors with local cache paths so # the transcript records usable paths for the model. From fc2754dbdff860cdeb8fe4ed5fe0464bb6295cbb Mon Sep 17 00:00:00 2001 From: libo1106 Date: Sun, 10 May 2026 01:05:23 +0800 Subject: [PATCH 008/218] fix(yuanbao): resolve quoted file/image via transcript lookup when quote desc lacks ybres MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user quotes a file message (type=3) and @bot, the quote's desc field only contains the filename without a ybres:// resource reference. The existing QuoteContextMiddleware only extracted media refs from desc using the ybres regex, which always returned empty for file quotes. Fix: add a transcript lookup fallback in QuoteContextMiddleware.handle() — when quote_media_refs is empty but reply_to_message_id is set, search the session transcript for the quoted message_id and extract ybres anchors from its content. Also fix message_type classification: when quote media resolves non-image files, override message_type to DOCUMENT so gateway/run.py's document injection logic properly prepends the file path and content for the agent. --- gateway/platforms/yuanbao.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py index 5696e2667d1..6c6981c0c2b 100644 --- a/gateway/platforms/yuanbao.py +++ b/gateway/platforms/yuanbao.py @@ -2216,6 +2216,34 @@ class QuoteContextMiddleware(InboundMiddleware): async def handle(self, ctx: InboundContext, next_fn) -> None: ctx.reply_to_message_id, ctx.reply_to_text, ctx.quote_media_refs = self._extract_quote_context(ctx.cloud_custom_data) + + # Fallback: if quote has a message_id but no media_refs extracted from desc, + # look up the quoted message in transcript history by message_id to find ybres anchors. + if ctx.reply_to_message_id and not ctx.quote_media_refs: + store = getattr(ctx.adapter, "_session_store", None) + if store: + try: + session_entry = store.get_or_create_session(ctx.source) + history = store.load_transcript(session_entry.session_id) + for msg in (history or []): + mid = msg.get("message_id", "") + if mid and mid == ctx.reply_to_message_id: + content = msg.get("content", "") + if isinstance(content, str) and "|ybres:" in content: + for m in _YB_RES_REF_RE.finditer(content): + head = m.group(1) + rid = m.group(2) + kind, _, filename = head.partition(":") + kind = kind.strip() + if kind in ("image", "file"): + ctx.quote_media_refs.append((rid, kind, filename.strip())) + break + except Exception as exc: + logger.warning( + "[%s] QuoteContext transcript lookup failed: %s", + ctx.adapter.name, exc, + ) + await next_fn() @@ -2589,7 +2617,11 @@ class DispatchMiddleware(InboundMiddleware): event = MessageEvent( text=_patched_event_text, - message_type=ctx.msg_type, + message_type=( + MessageType.DOCUMENT + if any(not mt.startswith("image/") for mt in media_types) + else ctx.msg_type + ), source=ctx.source, message_id=ctx.msg_id or None, raw_message=ctx.push, From 0086cdaf93b2a85abe787fc9b130e45c0b8b8388 Mon Sep 17 00:00:00 2001 From: libo1106 Date: Sun, 10 May 2026 01:47:36 +0800 Subject: [PATCH 009/218] =?UTF-8?q?refactor(yuanbao):=20improve=20quote=20?= =?UTF-8?q?media=20fallback=20=E2=80=94=20move=20to=20DispatchMiddleware,?= =?UTF-8?q?=20tighten=20conditions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- gateway/platforms/yuanbao.py | 63 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py index 6c6981c0c2b..7015e0c848c 100644 --- a/gateway/platforms/yuanbao.py +++ b/gateway/platforms/yuanbao.py @@ -147,6 +147,9 @@ _YB_RES_REF_RE = re.compile( r"\[(image|voice|video|file(?::[^|\]]*)?)\|ybres:([A-Za-z0-9_\-]+)\]" ) +# Media kinds that can be resolved and injected into the model context +_RESOLVABLE_MEDIA_KINDS = frozenset({"image", "file"}) + # Strip page indicators like (1/3) appended by BasePlatformAdapter _INDICATOR_RE = re.compile(r'\s*\(\d+/\d+\)$') @@ -2217,33 +2220,6 @@ class QuoteContextMiddleware(InboundMiddleware): async def handle(self, ctx: InboundContext, next_fn) -> None: ctx.reply_to_message_id, ctx.reply_to_text, ctx.quote_media_refs = self._extract_quote_context(ctx.cloud_custom_data) - # Fallback: if quote has a message_id but no media_refs extracted from desc, - # look up the quoted message in transcript history by message_id to find ybres anchors. - if ctx.reply_to_message_id and not ctx.quote_media_refs: - store = getattr(ctx.adapter, "_session_store", None) - if store: - try: - session_entry = store.get_or_create_session(ctx.source) - history = store.load_transcript(session_entry.session_id) - for msg in (history or []): - mid = msg.get("message_id", "") - if mid and mid == ctx.reply_to_message_id: - content = msg.get("content", "") - if isinstance(content, str) and "|ybres:" in content: - for m in _YB_RES_REF_RE.finditer(content): - head = m.group(1) - rid = m.group(2) - kind, _, filename = head.partition(":") - kind = kind.strip() - if kind in ("image", "file"): - ctx.quote_media_refs.append((rid, kind, filename.strip())) - break - except Exception as exc: - logger.warning( - "[%s] QuoteContext transcript lookup failed: %s", - ctx.adapter.name, exc, - ) - await next_fn() @@ -2412,7 +2388,7 @@ class MediaResolveMiddleware(InboundMiddleware): for ref in media_refs: kind = str(ref.get("kind") or "").strip().lower() url = str(ref.get("url") or "").strip() - if kind not in {"image", "file"} or not url: + if kind not in _RESOLVABLE_MEDIA_KINDS or not url: continue try: @@ -2471,7 +2447,7 @@ class MediaResolveMiddleware(InboundMiddleware): rid = m.group(2) kind, _, filename = head.partition(":") kind = kind.strip() - if kind not in {"image", "file"}: + if kind not in _RESOLVABLE_MEDIA_KINDS: continue if rid in seen: continue @@ -2542,9 +2518,34 @@ class DispatchMiddleware(InboundMiddleware): # quote_media_refs to avoid injecting unrelated history media. # Otherwise, backfill observed media from recent transcript history. if ctx.reply_to_message_id is not None: + # Fallback: if desc didn't contain ybres refs, look up transcript + if not ctx.quote_media_refs: + try: + store = getattr(adapter, "_session_store", None) + if store: + session_entry = store.get_or_create_session(ctx.source) + history = store.load_transcript(session_entry.session_id) + for msg in reversed(history or []): + mid = msg.get("message_id", "") + if mid and mid == ctx.reply_to_message_id: + _content = msg.get("content", "") + if isinstance(_content, str) and "|ybres:" in _content: + for m in _YB_RES_REF_RE.finditer(_content): + head = m.group(1) + rid = m.group(2) + kind, _, filename = head.partition(":") + kind = kind.strip() + if kind in _RESOLVABLE_MEDIA_KINDS: + ctx.quote_media_refs.append((rid, kind, filename.strip())) + break + except Exception as exc: + logger.warning( + "[%s] quote transcript lookup failed: %s", + adapter.name, exc, + ) # User quoted a message — resolve only media from the quote for rid, kind, filename in ctx.quote_media_refs: - if kind not in ("image", "file"): + if kind not in _RESOLVABLE_MEDIA_KINDS: continue try: fresh_url = await MediaResolveMiddleware._resolve_by_resource_id(adapter, rid) @@ -2619,7 +2620,7 @@ class DispatchMiddleware(InboundMiddleware): text=_patched_event_text, message_type=( MessageType.DOCUMENT - if any(not mt.startswith("image/") for mt in media_types) + if any(mt.startswith(("application/", "text/")) for mt in media_types) else ctx.msg_type ), source=ctx.source, From e0e4856d466491ee8a31378c606e65ddfe061ab9 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Fri, 15 May 2026 01:20:24 -0700 Subject: [PATCH 010/218] feat(skills-hub): add huggingface/skills as trusted default tap (#2549) Adds Hugging Face's official skill catalog to the default GitHub taps and classifies it as a trusted source alongside openai/skills and anthropics/skills. - tools/skills_guard.py: huggingface/skills -> TRUSTED_REPOS - tools/skills_hub.py: GitHubSource.DEFAULT_TAPS += huggingface/skills (skills/) - website/docs: list it under default taps + trusted-source examples Closes #2549. Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com> --- tools/skills_guard.py | 2 +- tools/skills_hub.py | 1 + website/docs/developer-guide/creating-skills.md | 2 +- website/docs/user-guide/features/skills.md | 3 ++- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/skills_guard.py b/tools/skills_guard.py index 363e983da1a..1610c3225cb 100644 --- a/tools/skills_guard.py +++ b/tools/skills_guard.py @@ -36,7 +36,7 @@ from typing import List, Tuple # Hardcoded trust configuration # --------------------------------------------------------------------------- -TRUSTED_REPOS = {"openai/skills", "anthropics/skills"} +TRUSTED_REPOS = {"openai/skills", "anthropics/skills", "huggingface/skills"} INSTALL_POLICY = { # safe caution dangerous diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 3e2c27c338a..35cec56e08e 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -329,6 +329,7 @@ class GitHubSource(SkillSource): DEFAULT_TAPS = [ {"repo": "openai/skills", "path": "skills/"}, {"repo": "anthropics/skills", "path": "skills/"}, + {"repo": "huggingface/skills", "path": "skills/"}, {"repo": "VoltAgent/awesome-agent-skills", "path": "skills/"}, {"repo": "garrytan/gstack", "path": ""}, {"repo": "MiniMax-AI/cli", "path": "skill/"}, diff --git a/website/docs/developer-guide/creating-skills.md b/website/docs/developer-guide/creating-skills.md index 43f088a9a35..7496c661d48 100644 --- a/website/docs/developer-guide/creating-skills.md +++ b/website/docs/developer-guide/creating-skills.md @@ -360,7 +360,7 @@ All hub-installed skills go through a security scanner that checks for: Trust levels: - `builtin` — ships with Hermes (always trusted) - `official` — from `optional-skills/` in the repo (builtin trust, no third-party warning) -- `trusted` — from openai/skills, anthropics/skills +- `trusted` — from openai/skills, anthropics/skills, huggingface/skills - `community` — non-dangerous findings can be overridden with `--force`; `dangerous` verdicts remain blocked Hermes can now consume third-party skills from multiple external discovery models: diff --git a/website/docs/user-guide/features/skills.md b/website/docs/user-guide/features/skills.md index 9499e15d806..9959bcce112 100644 --- a/website/docs/user-guide/features/skills.md +++ b/website/docs/user-guide/features/skills.md @@ -351,6 +351,7 @@ Hermes can install directly from GitHub repositories and GitHub-based taps. This Default taps (browsable without any setup): - [openai/skills](https://github.com/openai/skills) - [anthropics/skills](https://github.com/anthropics/skills) +- [huggingface/skills](https://github.com/huggingface/skills) - [VoltAgent/awesome-agent-skills](https://github.com/VoltAgent/awesome-agent-skills) - [garrytan/gstack](https://github.com/garrytan/gstack) @@ -445,7 +446,7 @@ Important behavior: |-------|--------|--------| | `builtin` | Ships with Hermes | Always trusted | | `official` | `optional-skills/` in the repo | Builtin trust, no third-party warning | -| `trusted` | Trusted registries/repos such as `openai/skills`, `anthropics/skills` | More permissive policy than community sources | +| `trusted` | Trusted registries/repos such as `openai/skills`, `anthropics/skills`, `huggingface/skills` | More permissive policy than community sources | | `community` | Everything else (`skills.sh`, well-known endpoints, custom GitHub repos, most marketplaces) | Non-dangerous findings can be overridden with `--force`; `dangerous` verdicts stay blocked | ### Update lifecycle From e0e7397c32fa06e4c93ce07bc276ea5c1dca7a84 Mon Sep 17 00:00:00 2001 From: teyrebaz33 Date: Sun, 22 Mar 2026 23:54:02 +0300 Subject: [PATCH 011/218] fix(session): persist auto-reset state across gateway restarts was_auto_reset, auto_reset_reason, and reset_had_activity were not included in SessionEntry.to_dict() / from_dict(), so a gateway restart between session expiry and the user's next message would silently drop the auto-reset notification and context note. Add the three fields to the serialization roundtrip with safe defaults (False / None / False) so existing sessions.json files load cleanly. Add three roundtrip tests to test_session_reset_notify.py. --- gateway/session.py | 6 ++ tests/gateway/test_session_reset_notify.py | 75 ++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/gateway/session.py b/gateway/session.py index ac6f95eec63..dfa2ca9651d 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -518,6 +518,9 @@ class SessionEntry: else None ), "is_fresh_reset": self.is_fresh_reset, + "was_auto_reset": self.was_auto_reset, + "auto_reset_reason": self.auto_reset_reason, + "reset_had_activity": self.reset_had_activity, } if self.origin: result["origin"] = self.origin.to_dict() @@ -567,6 +570,9 @@ class SessionEntry: resume_reason=data.get("resume_reason"), last_resume_marked_at=last_resume_marked_at, is_fresh_reset=data.get("is_fresh_reset", False), + was_auto_reset=data.get("was_auto_reset", False), + auto_reset_reason=data.get("auto_reset_reason"), + reset_had_activity=data.get("reset_had_activity", False), ) diff --git a/tests/gateway/test_session_reset_notify.py b/tests/gateway/test_session_reset_notify.py index 87903921fbd..a4e9d71d0f8 100644 --- a/tests/gateway/test_session_reset_notify.py +++ b/tests/gateway/test_session_reset_notify.py @@ -205,3 +205,78 @@ class TestResetPolicyNotify: assert restored.notify == original.notify assert restored.notify_exclude_platforms == original.notify_exclude_platforms assert restored.mode == original.mode + + +# --------------------------------------------------------------------------- +# SessionEntry to_dict / from_dict roundtrip for auto-reset fields +# --------------------------------------------------------------------------- + +class TestSessionEntryAutoResetRoundtrip: + def test_was_auto_reset_persists_across_roundtrip(self, tmp_path): + """was_auto_reset=True survives to_dict() → from_dict() (gateway restart).""" + store = _make_store( + SessionResetPolicy(mode="idle", idle_minutes=1), + tmp_path, + ) + source = _make_source() + + entry = store.get_or_create_session(source) + entry.updated_at = datetime.now() - timedelta(minutes=5) + store._save() + + entry2 = store.get_or_create_session(source) + assert entry2.was_auto_reset is True + assert entry2.auto_reset_reason == "idle" + assert entry2.session_id != entry.session_id + + # Simulate gateway restart: reload from disk + store._loaded = False + store._entries.clear() + store._ensure_loaded() + + reloaded = store._entries.get(entry2.session_key) + assert reloaded is not None + assert reloaded.was_auto_reset is True + assert reloaded.auto_reset_reason == "idle" + + def test_reset_had_activity_persists_across_roundtrip(self, tmp_path): + """reset_had_activity survives to_dict() → from_dict() (gateway restart).""" + store = _make_store( + SessionResetPolicy(mode="idle", idle_minutes=1), + tmp_path, + ) + source = _make_source() + + entry = store.get_or_create_session(source) + entry.total_tokens = 1000 + entry.updated_at = datetime.now() - timedelta(minutes=5) + store._save() + + entry2 = store.get_or_create_session(source) + assert entry2.reset_had_activity is True + + store._loaded = False + store._entries.clear() + store._ensure_loaded() + + reloaded = store._entries.get(entry2.session_key) + assert reloaded is not None + assert reloaded.reset_had_activity is True + + def test_auto_reset_reason_none_roundtrip(self, tmp_path): + """auto_reset_reason=None (no reset) survives roundtrip cleanly.""" + store = _make_store(tmp_path=tmp_path) + source = _make_source() + + entry = store.get_or_create_session(source) + assert entry.was_auto_reset is False + + store._loaded = False + store._entries.clear() + store._ensure_loaded() + + reloaded = store._entries.get(entry.session_key) + assert reloaded is not None + assert reloaded.was_auto_reset is False + assert reloaded.auto_reset_reason is None + assert reloaded.reset_had_activity is False From 23ac522d3711ea0735f11f4d8f6131ac24554dd3 Mon Sep 17 00:00:00 2001 From: KiraKatana Date: Fri, 15 May 2026 01:24:44 -0700 Subject: [PATCH 012/218] fix(gateway): isinstance-guard string-form 429 error body When a non-Anthropic provider (e.g. Morpheus proxy) returns a 429 with `{"error": "Too Many Requests"}` instead of the expected `{"error": {"type": ...}}` dict, _err_body.json().get("error", {}) returns the raw string and the next .get("type") line crashes with AttributeError, taking down the message handler. Guard with isinstance(_err_json, dict) so non-dict error bodies fall through to the generic rate-limit hint. Salvaged from PR #2587 by @KiraKatana. The PR's fallback-config `base_url`/`api_key_env` fix was already implemented independently on main (run_agent.py:8759-8780) with additional aliases and Ollama Cloud host handling, so only the gateway guard is cherry-picked. Co-authored-by: KiraKatana --- gateway/run.py | 2 ++ scripts/release.py | 1 + 2 files changed, 3 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index d986917ebab..5e8fce8e18d 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -7991,6 +7991,8 @@ class GatewayRunner: try: if _err_body is not None: _err_json = _err_body.json().get("error", {}) + if not isinstance(_err_json, dict): + _err_json = {} except Exception: pass if _err_json.get("type") == "usage_limit_reached": diff --git a/scripts/release.py b/scripts/release.py index 53db4bbec2c..47cb78edff8 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -637,6 +637,7 @@ AUTHOR_MAP = { "skmishra1991@gmail.com": "bugkill3r", "karamusti912@gmail.com": "MustafaKara7", "kira@ariaki.me": "kira-ariaki", + "kira.ops@proton.me": "KiraKatana", "knopki@duck.com": "knopki", "limars874@gmail.com": "limars874", "lisicheng168@gmail.com": "lesterli", From 814c60092b08df3e4f7ccfcc0bab4e1fbaa39414 Mon Sep 17 00:00:00 2001 From: CoinTheHat <63822243+CoinTheHat@users.noreply.github.com> Date: Mon, 23 Mar 2026 14:23:32 +0300 Subject: [PATCH 013/218] fix: clean stale conversation mappings on response eviction/deletion ResponseStore.put() and .delete() now remove conversations rows that reference evicted or deleted response IDs, preventing 404 errors when a conversation name is reused after its backing response was purged. Adds regression tests for delete, eviction, and handler-level reuse. Co-Authored-By: Claude Opus 4.6 (1M context) --- gateway/platforms/api_server.py | 29 ++++++++++++--- tests/gateway/test_api_server.py | 62 ++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 8b53db3a99f..809d6cd8a03 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -356,15 +356,34 @@ class ResponseStore: # Evict oldest entries beyond max_size count = self._conn.execute("SELECT COUNT(*) FROM responses").fetchone()[0] if count > self._max_size: - self._conn.execute( - "DELETE FROM responses WHERE response_id IN " - "(SELECT response_id FROM responses ORDER BY accessed_at ASC LIMIT ?)", - (count - self._max_size,), - ) + # Collect IDs that will be evicted + evict_ids = [ + row[0] + for row in self._conn.execute( + "SELECT response_id FROM responses ORDER BY accessed_at ASC LIMIT ?", + (count - self._max_size,), + ).fetchall() + ] + if evict_ids: + placeholders = ",".join("?" for _ in evict_ids) + # Clear conversation mappings pointing to evicted responses + self._conn.execute( + f"DELETE FROM conversations WHERE response_id IN ({placeholders})", + evict_ids, + ) + # Delete evicted responses + self._conn.execute( + f"DELETE FROM responses WHERE response_id IN ({placeholders})", + evict_ids, + ) self._conn.commit() def delete(self, response_id: str) -> bool: """Remove a response from the store. Returns True if found and deleted.""" + # Clear conversation mappings pointing to this response + self._conn.execute( + "DELETE FROM conversations WHERE response_id = ?", (response_id,) + ) cursor = self._conn.execute( "DELETE FROM responses WHERE response_id = ?", (response_id,) ) diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index 66b304fff51..032af7109a5 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -105,6 +105,29 @@ class TestResponseStore: store = ResponseStore(max_size=10) assert store.delete("resp_missing") is False + def test_delete_clears_conversation_mapping(self): + """Deleting a response also removes conversation mappings that reference it.""" + store = ResponseStore(max_size=10) + store.put("resp_1", {"output": "hello"}) + store.set_conversation("chat-a", "resp_1") + assert store.get_conversation("chat-a") == "resp_1" + store.delete("resp_1") + assert store.get_conversation("chat-a") is None + + def test_eviction_clears_conversation_mapping(self): + """LRU eviction also removes conversation mappings for evicted responses.""" + store = ResponseStore(max_size=2) + store.put("resp_1", {"output": "one"}) + store.set_conversation("chat-a", "resp_1") + store.put("resp_2", {"output": "two"}) + store.set_conversation("chat-b", "resp_2") + # Adding a 3rd should evict resp_1 and its conversation mapping + store.put("resp_3", {"output": "three"}) + assert store.get("resp_1") is None + assert store.get_conversation("chat-a") is None + # resp_2 mapping should still be intact + assert store.get_conversation("chat-b") == "resp_2" + # --------------------------------------------------------------------------- # _IdempotencyCache @@ -2870,6 +2893,45 @@ class TestConversationParameter: # Conversation mapping should NOT be set since store=false assert adapter._response_store.get_conversation("ephemeral-chat") is None + @pytest.mark.asyncio + async def test_conversation_reuse_after_eviction_no_404(self, adapter): + """After eviction clears a conversation mapping, reusing that name starts fresh (no 404).""" + adapter._response_store = ResponseStore(max_size=1) + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = ( + {"final_response": "First", "messages": [], "api_calls": 1}, + {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, + ) + # Create conversation -> resp stored + resp1 = await cli.post("/v1/responses", json={ + "input": "hello", + "conversation": "my-chat", + }) + assert resp1.status == 200 + + # Evict by adding another response + mock_run.return_value = ( + {"final_response": "Other", "messages": [], "api_calls": 1}, + {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, + ) + await cli.post("/v1/responses", json={"input": "other"}) + + # Conversation mapping should have been cleaned by eviction + assert adapter._response_store.get_conversation("my-chat") is None + + # Reuse conversation name — should start fresh, not 404 + mock_run.return_value = ( + {"final_response": "Restarted", "messages": [], "api_calls": 1}, + {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, + ) + resp3 = await cli.post("/v1/responses", json={ + "input": "hello again", + "conversation": "my-chat", + }) + assert resp3.status == 200 + # --------------------------------------------------------------------------- # X-Hermes-Session-Id header (session continuity) From 0161d4bb6ce3154e2cdd8ce54d43273cf457840f Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:27:31 -0700 Subject: [PATCH 014/218] chore(release): add AUTHOR_MAP entry for CoinTheHat --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 47cb78edff8..10d67f3e708 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -74,6 +74,7 @@ AUTHOR_MAP = { "1095245867@qq.com": "littlewwwhite", "db@project-aeon.com": "db-aeon", "ahmed@abadr.net": "ahmedbadr3", + "63822243+CoinTheHat@users.noreply.github.com": "CoinTheHat", "cleo@edaphic.xyz": "curiouscleo", "hirokazu.ogawa@kwansei.ac.jp": "hrkzogw", "datapod.k@gmail.com": "dandacompany", From 681778a0b753bac894bd30b1d257bcb3eface63d Mon Sep 17 00:00:00 2001 From: Wysie Date: Fri, 15 May 2026 01:29:43 -0700 Subject: [PATCH 015/218] fix(whatsapp): fail fast when Baileys sendMessage hangs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Baileys' sock.sendMessage() can hang indefinitely while uploading media to WhatsApp servers (and, less often, on text sends), pinning the bridge's Express handler until the gateway's aiohttp timeout fires — surfacing to the user as a 120s wait followed by an empty error from the TTS/voice path. Wrap every sock.sendMessage() call inside the bridge in a sendWithTimeout() helper that rejects after WHATSAPP_SEND_TIMEOUT_MS (default 60s) via Promise.race. The four call sites are /send, /edit, and /send-media's primary send. Express handlers catch the rejection in their existing try/catch and return a real 500 to the gateway, which can then surface a retryable error. Salvaged from #2608 — wysie diagnosed the hang and the Promise.race shape; the other two parts of that PR (gateway HTTP session pooling, base.py metadata kwarg removal) already landed on main via separate routes and are no longer needed. Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com> --- scripts/whatsapp-bridge/bridge.js | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 9ff64471e56..5723d8b543b 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -57,11 +57,28 @@ const REPLY_PREFIX = process.env.WHATSAPP_REPLY_PREFIX === undefined : process.env.WHATSAPP_REPLY_PREFIX.replace(/\\n/g, '\n'); const MAX_MESSAGE_LENGTH = parseInt(process.env.WHATSAPP_MAX_MESSAGE_LENGTH || '4096', 10); const CHUNK_DELAY_MS = parseInt(process.env.WHATSAPP_CHUNK_DELAY_MS || '300', 10); +// Per-call timeout for sock.sendMessage(). Baileys occasionally hangs forever +// when uploading media to WhatsApp servers (and, less often, on text sends), +// which pins the bridge's HTTP handler until the upstream aiohttp timeout +// fires. Fail fast instead so the gateway can surface a real error and retry. +const SEND_TIMEOUT_MS = parseInt(process.env.WHATSAPP_SEND_TIMEOUT_MS || '60000', 10); function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } +function sendWithTimeout(chatId, payload, timeoutMs = SEND_TIMEOUT_MS) { + let timer; + const timeoutPromise = new Promise((_, reject) => { + timer = setTimeout( + () => reject(new Error(`sendMessage timed out after ${timeoutMs / 1000}s`)), + timeoutMs, + ); + }); + return Promise.race([sock.sendMessage(chatId, payload), timeoutPromise]) + .finally(() => clearTimeout(timer)); +} + function formatOutgoingMessage(message) { // In bot mode, messages come from a different number so the prefix is // redundant — the sender identity is already clear. Only prepend in @@ -487,7 +504,7 @@ app.post('/send', async (req, res) => { const chunks = splitLongMessage(formatOutgoingMessage(message)); const messageIds = []; for (let i = 0; i < chunks.length; i += 1) { - const sent = await sock.sendMessage(chatId, { text: chunks[i] }); + const sent = await sendWithTimeout(chatId, { text: chunks[i] }); trackSentMessageId(sent); if (sent?.key?.id) messageIds.push(sent.key.id); if (chunks.length > 1 && i < chunks.length - 1) { @@ -521,10 +538,10 @@ app.post('/edit', async (req, res) => { const chunks = splitLongMessage(formatOutgoingMessage(message)); const messageIds = []; - await sock.sendMessage(chatId, { text: chunks[0], edit: key }); + await sendWithTimeout(chatId, { text: chunks[0], edit: key }); if (chunks.length > 1) { for (let i = 1; i < chunks.length; i += 1) { - const sent = await sock.sendMessage(chatId, { text: chunks[i] }); + const sent = await sendWithTimeout(chatId, { text: chunks[i] }); trackSentMessageId(sent); if (sent?.key?.id) messageIds.push(sent.key.id); if (i < chunks.length - 1) { @@ -625,7 +642,7 @@ app.post('/send-media', async (req, res) => { break; } - const sent = await sock.sendMessage(chatId, msgPayload); + const sent = await sendWithTimeout(chatId, msgPayload); trackSentMessageId(sent); From 04b1fdaecfda15ff4c8f5c9f0041516efd01ba30 Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Fri, 15 May 2026 14:03:08 +0530 Subject: [PATCH 016/218] security(deps): add upper bounds to 5 loose deps + document supply chain policy (#24226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the Mini Shai-Hulud supply chain campaign (May 2026) and the litellm compromise (March 2026), codify the dependency pinning policy that was established in PRs #2810 and #9801 but never written down for contributors. Changes: - pyproject.toml: Add tight upper bounds to the 5 deps that slipped through as review escapes from external contributor PRs: - hindsight-client>=0.4.22,<0.5 (was >=0.4.22) - aiosqlite>=0.20,<0.23 (was >=0.20) - asyncpg>=0.29,<0.32 (was >=0.29) - alibabacloud-dingtalk>=2.0.0,<3 (was >=2.0.0) - youtube-transcript-api>=1.2.0,<2 (was >=1.2.0) Pre-1.0 packages get <0.(current_minor+2) — tight enough to block hostile minor releases but loose enough to not require bumps every week. - CONTRIBUTING.md: Add 'Dependency pinning policy' section under Security with the full rationale, table of source types + treatments, and examples. - AGENTS.md: Add concise 'Dependency Pinning Policy' section for AI coding agents with the decision table and step-by-step checklist. - supply-chain-audit.yml: Add dep-bounds job that fails PRs introducing PyPI deps without > "$GITHUB_OUTPUT" + exit 0 + fi + + # Match PyPI dep specs that have >= but no < ceiling. + # Pattern: "package>=version" without a following ",<" bound. + # Excludes git+ URLs (which use commit SHAs) and comments. + UNBOUNDED=$(echo "$ADDED" | grep -oE '"[a-zA-Z0-9_-]+(\[[^\]]*\])?>=[ 0-9.]+"' | grep -v ',<' || true) + + if [ -n "$UNBOUNDED" ]; then + echo "found=true" >> "$GITHUB_OUTPUT" + echo "$UNBOUNDED" > /tmp/unbounded.txt + else + echo "found=false" >> "$GITHUB_OUTPUT" + fi + + - name: Post unbounded dep warning + if: steps.bounds.outputs.found == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + BODY="## ⚠️ Unbounded PyPI Dependency Detected + + This PR adds PyPI dependencies without a \`=floor,=1.2.0,<2\"\` + + --- + *See PR #2810 and CONTRIBUTING.md for the full policy rationale.*" + + gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)" + + - name: Fail on unbounded deps + if: steps.bounds.outputs.found == 'true' + run: | + echo "::error::PyPI dependencies without upper bounds detected. Add =floor,=0.28.1,<1"` | +| Git URL | Commit SHA | `git+https://...@<40-char-sha>` | +| GitHub Actions | Commit SHA + comment | `uses: actions/checkout@ # v4` | +| CI-only pip | `==exact` | `pyyaml==6.0.2` | + +**When adding a new dependency to `pyproject.toml`:** +1. Pin to `>=current_version,=1.5.0,<2`). +2. For pre-1.0 packages, use `<0.(current_minor + 2)` (e.g. `>=0.29,<0.32`). +3. Never commit a bare `>=X.Y.Z` without a ceiling — CI and reviewers will reject it. +4. Run `uv lock` to regenerate `uv.lock` with hashes. + +Reference: #2810 (bounds pass), #9801 (SHA pinning + audit CI). + +--- + ## Adding Configuration ### config.yaml options: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9cbc26112f6..36b1e9df2d5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -800,6 +800,47 @@ Hermes has terminal access. Security matters. If your PR affects security, note it explicitly in the description. +### Dependency pinning policy (supply chain hardening) + +After the [litellm supply chain compromise](https://github.com/BerriAI/litellm/issues/24512) in March 2026 and the [Mini Shai-Hulud worm campaign](https://socket.dev/blog/tanstack-npm-packages-compromised-mini-shai-hulud-supply-chain-attack) in May 2026, all dependencies must follow these rules: + +| Source type | Required treatment | Rationale | +|---|---|---| +| **PyPI package** | `>=floor, # vX.Y.Z` | +| **CI-only pip installs** | `==exact` | Hermetic CI builds; churn is acceptable. | + +**Every new PyPI dependency in a PR must have a `=X.Y.Z` specs will be rejected by reviewers. The `supply-chain-audit.yml` CI workflow also flags dependency manifest changes for manual review. + +**How to determine the ceiling:** +- If the package is at version `1.x.y`, use `<2`. +- If the package is at version `0.x.y` (pre-1.0), use `<0.(current_minor + 2)` — e.g. if current is `0.29.x`, use `<0.32`. This gives ~2 minor versions of headroom while keeping the window small enough that a hostile takeover version is unlikely to land inside it. +- Exception: packages with very stable APIs (e.g. `aiohttp-socks`) can use `<1` at reviewer discretion. + +**Examples:** +```toml +# ✅ Correct — post-1.0 +"openai>=2.21.0,<3" +"pydantic>=2.12.5,<3" + +# ✅ Correct — pre-1.0 (tight minor window) +"asyncpg>=0.29,<0.32" +"aiosqlite>=0.20,<0.23" +"hindsight-client>=0.4.22,<0.5" + +# ❌ Rejected — no upper bound +"some-package>=1.2.3" + +# ❌ Rejected — too tight (blocks legitimate patches) +"some-package==1.2.3" + +# ❌ Rejected — too loose for pre-1.0 (allows 80 minor versions) +"some-package>=0.20,<1" +``` + +**Reference PRs:** #2796 (litellm removal), #2810 (upper bounds pass), #9801 (SHA pinning + supply-chain-audit CI). + --- ## Pull Request Process From 9329e06696c968b7a960541d0ee0167df6742f21 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:33:13 -0700 Subject: [PATCH 017/218] feat(image-gen): actionable setup message when no FAL backend is reachable (#26222) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the in-tree FAL path has no API key (and no managed gateway), the handler used to return a bare 'FAL_KEY environment variable not set' error. Users had no idea where to get a key, that a managed Nous gateway exists, or that plugin-registered providers are an option. Now `image_generate_tool` returns a structured multi-line message: - signup link (https://fal.ai) - managed-gateway status (if Nous tools are enabled) - pointer to `hermes tools` / `hermes plugins list` for alternate backends, so users on a stale `image_gen.provider` know where to look The schema is untouched — `check_fn` still gates the tool out of the schema when no backend is reachable at startup, consistent with every other conditional tool. This patch fixes the call-time failure modes: managed-gateway 5xx, plugin provider disappearing mid-session, etc. Inspired by #2546 / @Mibayy. The PR was ~5700 commits stale against the new plugin-aware image_gen architecture, so this is a forward port of the actionable-error idea rather than a cherry-pick. Closes #2543 Co-authored-by: Mibayy --- tests/tools/test_image_generation_env.py | 59 ++++++++++++++++++++++++ tools/image_generation_tool.py | 41 ++++++++++++++-- 2 files changed, 96 insertions(+), 4 deletions(-) diff --git a/tests/tools/test_image_generation_env.py b/tests/tools/test_image_generation_env.py index fc4e6553346..56c9741617f 100644 --- a/tests/tools/test_image_generation_env.py +++ b/tests/tools/test_image_generation_env.py @@ -37,3 +37,62 @@ def test_fal_key_empty_is_unset(monkeypatch): ) assert image_generation_tool.check_fal_api_key() is False + + +# --------------------------------------------------------------------------- +# Actionable setup message when no FAL backend is reachable. +# Regression for the silent-drop UX gap described in issue #2543. +# --------------------------------------------------------------------------- + + +def test_no_backend_message_mentions_fal_signup_and_plugins(monkeypatch): + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "managed_nous_tools_enabled", lambda: False + ) + + msg = image_generation_tool._build_no_backend_setup_message() + + assert "FAL_KEY" in msg + assert "https://fal.ai" in msg + # Plugin pointer so users on a stale image_gen.provider know where to look. + assert "hermes tools" in msg or "hermes plugins" in msg + + +def test_no_backend_message_mentions_managed_gateway_when_enabled(monkeypatch): + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "managed_nous_tools_enabled", lambda: True + ) + + msg = image_generation_tool._build_no_backend_setup_message() + + assert "managed FAL gateway" in msg + assert "Nous account" in msg or "hermes setup" in msg + + +def test_image_generate_tool_returns_actionable_error_when_no_backend(monkeypatch): + """End-to-end: handler must surface the actionable message, not a bare string.""" + import json + + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "fal_key_is_configured", lambda: False + ) + monkeypatch.setattr( + image_generation_tool, "_resolve_managed_fal_gateway", lambda: None + ) + monkeypatch.setattr( + image_generation_tool, "managed_nous_tools_enabled", lambda: False + ) + + result = json.loads( + image_generation_tool.image_generate_tool(prompt="a cat") + ) + + assert result["success"] is False + assert "https://fal.ai" in result["error"] + assert "FAL_KEY" in result["error"] diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index c496166ec98..3d171f093c9 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -698,10 +698,7 @@ def image_generate_tool( raise ValueError("Prompt is required and must be a non-empty string") if not (fal_key_is_configured() or _resolve_managed_fal_gateway()): - message = "FAL_KEY environment variable not set" - if managed_nous_tools_enabled(): - message += " and managed FAL gateway is unavailable" - raise ValueError(message) + raise ValueError(_build_no_backend_setup_message()) aspect_lc = (aspect_ratio or DEFAULT_ASPECT_RATIO).lower().strip() if aspect_lc not in VALID_ASPECT_RATIOS: @@ -811,6 +808,42 @@ def check_fal_api_key() -> bool: return bool(fal_key_is_configured() or _resolve_managed_fal_gateway()) +def _build_no_backend_setup_message() -> str: + """Build an actionable error string when no FAL backend is reachable. + + Used by the in-tree FAL path. Mentions: + - FAL_KEY signup link + - managed-gateway status (if Nous tools are enabled) + - plugin alternative pointer (so users on a stale ``image_gen.provider`` + know the registry exists and how to inspect it) + """ + lines = ["Image generation is unavailable in this environment.", ""] + lines.append("Missing requirements:") + if managed_nous_tools_enabled(): + lines.append( + " - FAL_KEY is not set and the managed FAL gateway is unreachable" + ) + else: + lines.append(" - FAL_KEY environment variable is not set") + lines.append("") + lines.append("To enable image generation, do one of:") + lines.append( + " 1. Get a free API key at https://fal.ai and set " + "FAL_KEY= (then restart the session)" + ) + if managed_nous_tools_enabled(): + lines.append( + " 2. Sign in to a Nous account that has the managed FAL " + "gateway enabled (`hermes setup`)" + ) + lines.append( + " 3. Configure a different image_gen provider via `hermes tools` " + "→ Image Generation (run `hermes plugins list` to see installed " + "backends)" + ) + return "\n".join(lines) + + def check_image_generation_requirements() -> bool: """True if any image gen backend is available. From 05d9f641c06043a538ba03e3ed008a97403fcc3b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:34:15 -0700 Subject: [PATCH 018/218] docs(cron): worked recipes for the wakeAgent pre-run gate (#26229) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds three pre-run gate recipes to the cron docs: - file-change gate (stat + mtime + state file) - external-flag gate (file presence) - SQL-count gate (user's own database, not state.db) These are the use cases @iankar8 proposed adding as a parallel 'trigger' subsystem in #2654. The existing `script` + `wakeAgent` gate already covers all three at $0 — this lands the patterns as documentation so users can find them, instead of adding a second gating mechanism to the cron subsystem. --- website/docs/user-guide/features/cron.md | 80 ++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md index c2c67df8a2a..9a14e6dcd1e 100644 --- a/website/docs/user-guide/features/cron.md +++ b/website/docs/user-guide/features/cron.md @@ -522,6 +522,86 @@ print(json.dumps({"wakeAgent": True, "context": {"new_issues": latest - prev}})) When `wakeAgent` is omitted, the default is `true` (wake the agent as usual). +#### Recipes: cheap pre-run gates + +The `wakeAgent` gate gives you a $0 way to decide whether a scheduled job should spend any LLM tokens at all. Three patterns cover most use cases. + +**File-change gate** — only run when a watched file has new content since the last successful tick. The scheduler records each job's `last_run_at`; compare it against the file's mtime. + +```bash +#!/bin/bash +# ~/.hermes/scripts/feed-changed.sh +FEED="$HOME/data/feed.json" +STATE="$HOME/.hermes/scripts/.feed-changed.last" +test -f "$FEED" || { echo '{"wakeAgent": false}'; exit 0; } +mtime=$(stat -c %Y "$FEED") +last=$(cat "$STATE" 2>/dev/null || echo 0) +if [ "$mtime" -le "$last" ]; then + echo '{"wakeAgent": false}' +else + echo "$mtime" > "$STATE" + echo '{"wakeAgent": true}' +fi +``` + +```text +cronjob(action="create", name="process-feed", + schedule="every 30m", + script="feed-changed.sh", + prompt="A new ~/data/feed.json has landed. Summarize what changed.") +``` + +**External-flag gate** — only run when some other process has signalled readiness (e.g. a deploy hook drops a file, a CI job sets a value in your state store). + +```bash +#!/bin/bash +# ~/.hermes/scripts/flag-ready.sh +if test -f /tmp/new-data-ready; then + rm -f /tmp/new-data-ready + echo '{"wakeAgent": true}' +else + echo '{"wakeAgent": false}' +fi +``` + +```text +cronjob(action="create", name="nightly-analysis", + schedule="0 9 * * *", + script="flag-ready.sh", + prompt="Run the nightly analysis over today's batch.") +``` + +**SQL-count gate** — only run when there are new rows to process in your own database. The script can also pass the count through to the agent via `context`, so the agent knows how much it's looking at without re-querying. + +```python +#!/usr/bin/env python +# ~/.hermes/scripts/new-rows.py +import json, sqlite3 +conn = sqlite3.connect("/home/me/data/app.db") +n = conn.execute( + "SELECT COUNT(*) FROM messages WHERE ts > strftime('%s','now','-2 hours')" +).fetchone()[0] +if n < 1: + print(json.dumps({"wakeAgent": False})) +else: + print(json.dumps({"wakeAgent": True, "context": {"new_rows": n}})) +``` + +```text +cronjob(action="create", name="summarize-new-msgs", + schedule="every 2h", + script="new-rows.py", + prompt="Summarize the new messages from the last 2 hours.") +``` + +The same pattern works for any data source you can query from a script — Postgres, an HTTP API, your own state store — without baking a SQL evaluator into the cron subsystem. + +:::tip +Hermes's own `~/.hermes/state.db` is an internal schema that changes between releases. Don't query it from a pre-run gate — point at your own database or feed instead. +::: + +Credit: this recipe set was prompted by @iankar8's exploration in [#2654](https://github.com/NousResearch/hermes-agent/pull/2654), which proposed adding sql/file/command triggers as a parallel mechanism. The `script` + `wakeAgent` gate already covers all three cases at $0, so the work landed as documentation instead. + ### Chaining jobs: `context_from` A cron job can consume the most recent successful output of one or more other jobs by listing their names (or IDs) in `context_from`: From 6682f91b80bab57c65435ae6b5cdc791334ed620 Mon Sep 17 00:00:00 2001 From: buntingszn <108427749+buntingszn@users.noreply.github.com> Date: Fri, 15 May 2026 01:33:12 -0700 Subject: [PATCH 019/218] feat(cron): support name-based lookup for job operations Cron mutation operations (run/pause/resume/remove) and 'hermes cron edit' now accept a job name in addition to the hex ID, with case-insensitive matching. Before this, 'hermes cron run my_job_name' died with 'Job with ID my_job_name not found' and forced the user to look up the hex ID first. The original PR matched by name but silently picked the first match when two jobs shared a name. This version refuses to act on an ambiguous name and surfaces every matching job (id, name, schedule, next_run_at) so the caller can pick a specific ID. - cron/jobs.py: - get_job() stays ID-only (preserves existing call-site semantics for web_server/api_server/curator/scheduler/test code that always passes real IDs). - resolve_job_ref() is the new name-or-ID resolver, used by pause/ resume/trigger/remove_job. Exact ID match wins over a name match even if a different job's name happens to equal that ID. Ambiguous name match raises AmbiguousJobReference with all candidate IDs. - tools/cronjob_tools.py: dispatch site uses resolve_job_ref, surfaces ambiguous matches as a structured error with the matching IDs. - hermes_cli/cron.py: 'cron edit' uses resolve_job_ref so editing by name works and ambiguous names are reported with IDs. - tests/cron/test_jobs.py: new TestResolveJobRef covering ID match, case-insensitive name match, ID-wins-over-name, ambiguous refusal, and that pause/resume/trigger/remove all refuse on ambiguity. Closes #2627 --- cron/jobs.py | 67 +++++++++++++++++++++++++------ hermes_cli/cron.py | 10 ++++- tests/cron/test_jobs.py | 87 +++++++++++++++++++++++++++++++++++++++++ tools/cronjob_tools.py | 28 +++++++++++-- 4 files changed, 176 insertions(+), 16 deletions(-) diff --git a/cron/jobs.py b/cron/jobs.py index 6b3bc0e66f9..c5da32d44d5 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -645,6 +645,44 @@ def get_job(job_id: str) -> Optional[Dict[str, Any]]: return None +class AmbiguousJobReference(LookupError): + """Raised when a job name matches more than one job.""" + + def __init__(self, ref: str, matches: List[Dict[str, Any]]): + self.ref = ref + self.matches = matches + ids = ", ".join(m["id"] for m in matches) + super().__init__( + f"Job name '{ref}' is ambiguous — matches {len(matches)} jobs: {ids}. " + f"Use the job ID instead." + ) + + +def resolve_job_ref(ref: str) -> Optional[Dict[str, Any]]: + """Resolve a job reference (ID or name) to a job record. + + - Exact ID match wins (works even if a different job's name equals this ID). + - Otherwise, case-insensitive name match. + - If a name matches more than one job, raises AmbiguousJobReference so the + caller can surface the matching IDs rather than silently picking one. + """ + if not ref: + return None + jobs = load_jobs() + for job in jobs: + if job["id"] == ref: + return _normalize_job_record(job) + ref_lower = ref.lower() + name_matches = [j for j in jobs if (j.get("name") or "").lower() == ref_lower] + if not name_matches: + return None + if len(name_matches) > 1: + raise AmbiguousJobReference( + ref, [_normalize_job_record(j) for j in name_matches] + ) + return _normalize_job_record(name_matches[0]) + + def list_jobs(include_disabled: bool = False) -> List[Dict[str, Any]]: """List all jobs, optionally including disabled ones.""" jobs = [_normalize_job_record(j) for j in load_jobs()] @@ -702,9 +740,12 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]] def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, Any]]: - """Pause a job without deleting it.""" + """Pause a job without deleting it. Accepts a job ID or name.""" + job = resolve_job_ref(job_id) + if not job: + return None return update_job( - job_id, + job["id"], { "enabled": False, "state": "paused", @@ -715,14 +756,14 @@ def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, A def resume_job(job_id: str) -> Optional[Dict[str, Any]]: - """Resume a paused job and compute the next future run from now.""" - job = get_job(job_id) + """Resume a paused job and compute the next future run from now. Accepts a job ID or name.""" + job = resolve_job_ref(job_id) if not job: return None next_run_at = compute_next_run(job["schedule"]) return update_job( - job_id, + job["id"], { "enabled": True, "state": "scheduled", @@ -734,12 +775,12 @@ def resume_job(job_id: str) -> Optional[Dict[str, Any]]: def trigger_job(job_id: str) -> Optional[Dict[str, Any]]: - """Schedule a job to run on the next scheduler tick.""" - job = get_job(job_id) + """Schedule a job to run on the next scheduler tick. Accepts a job ID or name.""" + job = resolve_job_ref(job_id) if not job: return None return update_job( - job_id, + job["id"], { "enabled": True, "state": "scheduled", @@ -751,14 +792,18 @@ def trigger_job(job_id: str) -> Optional[Dict[str, Any]]: def remove_job(job_id: str) -> bool: - """Remove a job by ID.""" + """Remove a job by ID or name.""" + job = resolve_job_ref(job_id) + if not job: + return False + canonical_id = job["id"] jobs = load_jobs() original_len = len(jobs) - jobs = [j for j in jobs if j["id"] != job_id] + jobs = [j for j in jobs if j["id"] != canonical_id] if len(jobs) < original_len: save_jobs(jobs) # Clean up output directory to prevent orphaned dirs accumulating - job_output_dir = OUTPUT_DIR / job_id + job_output_dir = OUTPUT_DIR / canonical_id if job_output_dir.exists(): shutil.rmtree(job_output_dir) return True diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py index adf4f0c0927..7bff9c6b87b 100644 --- a/hermes_cli/cron.py +++ b/hermes_cli/cron.py @@ -196,9 +196,15 @@ def cron_create(args): def cron_edit(args): - from cron.jobs import get_job + from cron.jobs import AmbiguousJobReference, resolve_job_ref - job = get_job(args.job_id) + try: + job = resolve_job_ref(args.job_id) + except AmbiguousJobReference as exc: + print(color(str(exc), Colors.RED)) + for m in exc.matches: + print(f" {m['id']} (name: {m.get('name')!r})") + return 1 if not job: print(color(f"Job not found: {args.job_id}", Colors.RED)) return 1 diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py index af42ca444b2..16c56cd6220 100644 --- a/tests/cron/test_jobs.py +++ b/tests/cron/test_jobs.py @@ -321,6 +321,93 @@ class TestPauseResumeJob: assert resumed["paused_reason"] is None +class TestResolveJobRef: + """Name-based job lookup for CLI/tool callers (PR #2627, @buntingszn).""" + + def test_resolve_by_exact_id(self, tmp_cron_dir): + from cron.jobs import resolve_job_ref + + job = create_job(prompt="A", schedule="1h", name="alpha") + assert resolve_job_ref(job["id"])["id"] == job["id"] + + def test_resolve_by_name(self, tmp_cron_dir): + from cron.jobs import resolve_job_ref + + job = create_job(prompt="A", schedule="1h", name="alpha") + assert resolve_job_ref("alpha")["id"] == job["id"] + + def test_resolve_by_name_case_insensitive(self, tmp_cron_dir): + from cron.jobs import resolve_job_ref + + job = create_job(prompt="A", schedule="1h", name="MyJob") + assert resolve_job_ref("myjob")["id"] == job["id"] + assert resolve_job_ref("MYJOB")["id"] == job["id"] + + def test_resolve_returns_none_when_not_found(self, tmp_cron_dir): + from cron.jobs import resolve_job_ref + + create_job(prompt="A", schedule="1h", name="alpha") + assert resolve_job_ref("does-not-exist") is None + assert resolve_job_ref("") is None + + def test_resolve_id_wins_over_name(self, tmp_cron_dir): + """If a job's name happens to equal another job's ID, ID match wins.""" + from cron.jobs import resolve_job_ref + + j1 = create_job(prompt="A", schedule="1h") + # Create a second job whose name is j1's ID + j2 = create_job(prompt="B", schedule="1h", name=j1["id"]) + # Looking up j1["id"] must return j1, not the colliding-name job j2 + assert resolve_job_ref(j1["id"])["id"] == j1["id"] + assert resolve_job_ref(j1["id"])["id"] != j2["id"] + + def test_resolve_ambiguous_name_raises(self, tmp_cron_dir): + """Two jobs sharing a name → refuse to pick, surface both IDs.""" + from cron.jobs import AmbiguousJobReference, resolve_job_ref + + j1 = create_job(prompt="A", schedule="1h", name="dup") + j2 = create_job(prompt="B", schedule="1h", name="dup") + with pytest.raises(AmbiguousJobReference) as exc_info: + resolve_job_ref("dup") + ids = {m["id"] for m in exc_info.value.matches} + assert ids == {j1["id"], j2["id"]} + # Error message mentions both IDs so the user can pick one + assert j1["id"] in str(exc_info.value) + assert j2["id"] in str(exc_info.value) + + def test_trigger_by_name(self, tmp_cron_dir): + from cron.jobs import trigger_job + + job = create_job(prompt="A", schedule="1h", name="alpha") + result = trigger_job("alpha") + assert result is not None + assert result["id"] == job["id"] + + def test_pause_by_name(self, tmp_cron_dir): + job = create_job(prompt="A", schedule="1h", name="alpha") + result = pause_job("alpha", reason="manual") + assert result is not None + assert result["id"] == job["id"] + assert result["state"] == "paused" + + def test_remove_by_name(self, tmp_cron_dir): + job = create_job(prompt="A", schedule="1h", name="alpha") + assert remove_job("alpha") is True + assert get_job(job["id"]) is None + + def test_mutations_refuse_ambiguous_name(self, tmp_cron_dir): + """pause/resume/trigger/remove must refuse to act on an ambiguous name.""" + from cron.jobs import AmbiguousJobReference, trigger_job + + create_job(prompt="A", schedule="1h", name="dup") + create_job(prompt="B", schedule="1h", name="dup") + for fn in (pause_job, resume_job, trigger_job): + with pytest.raises(AmbiguousJobReference): + fn("dup") + with pytest.raises(AmbiguousJobReference): + remove_job("dup") + + class TestMarkJobRun: def test_increments_completed(self, tmp_cron_dir): job = create_job(prompt="Test", schedule="every 1h") diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index e63b60047ac..3c29431484d 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -21,12 +21,14 @@ logger = logging.getLogger(__name__) sys.path.insert(0, str(Path(__file__).parent.parent)) from cron.jobs import ( + AmbiguousJobReference, create_job, get_job, list_jobs, parse_schedule, pause_job, remove_job, + resolve_job_ref, resume_job, trigger_job, update_job, @@ -393,12 +395,32 @@ def cronjob( if not job_id: return tool_error(f"job_id is required for action '{normalized}'", success=False) - job = get_job(job_id) - if not job: + try: + job = resolve_job_ref(job_id) + except AmbiguousJobReference as exc: return json.dumps( - {"success": False, "error": f"Job with ID '{job_id}' not found. Use cronjob(action='list') to inspect jobs."}, + { + "success": False, + "error": str(exc), + "matches": [ + { + "id": m["id"], + "name": m.get("name"), + "schedule": m.get("schedule_display"), + "next_run_at": m.get("next_run_at"), + } + for m in exc.matches + ], + }, indent=2, ) + if not job: + return json.dumps( + {"success": False, "error": f"Job with ID or name '{job_id}' not found. Use cronjob(action='list') to inspect jobs."}, + indent=2, + ) + # Resolve to canonical ID (supports name-based lookup) + job_id = job["id"] if normalized == "remove": removed = remove_job(job_id) From 9f57f2286d9fb52419c69ea64c3119f734b35ef1 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:33:16 -0700 Subject: [PATCH 020/218] chore(release): add AUTHOR_MAP entry for buntingszn --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 10d67f3e708..b0e1fda9686 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -64,6 +64,7 @@ AUTHOR_MAP = { "mr@shu.io": "mrshu", "adam.manning@gmail.com": "am423", "buraysandro9@gmail.com": "ygd58", + "108427749+buntingszn@users.noreply.github.com": "buntingszn", "yanglongwei06@gmail.com": "Alex-yang00", "teknium@nousresearch.com": "teknium1", "piyushvp1@gmail.com": "thelumiereguy", From 85782a4ed7f2329957c4af9a4243acb51c3cf921 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:36:54 -0700 Subject: [PATCH 021/218] feat(acp): hermes acp --setup-browser bootstraps browser tools for registry installs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Zed ACP Registry path (uvx --from 'hermes-agent[acp]==X' hermes-acp) gets a Python-only install. Browser tools depend on the agent-browser npm package + Chromium, neither of which are in the wheel. Without an explicit bootstrap, registry users have no path to working browser tools. Ship a bundled, idempotent bootstrap script (Linux/macOS bash + Windows PowerShell) inside acp_adapter/bootstrap/ as wheel package-data. New entry points: hermes acp --setup-browser # interactive; prompts before Chromium download hermes acp --setup-browser --yes # non-interactive hermes-acp --setup-browser The terminal-auth flow (hermes acp --setup) also offers the browser bootstrap as a follow-up after model selection, so first-run registry users get the option without knowing the flag exists. Key design choices: - npm install -g --prefix $NODE_PREFIX so we never need sudo. System Node on PATH is respected; only the install target is redirected to the user-writable Hermes-managed Node prefix. - tools/browser_tool.py::_browser_candidate_path_dirs() already walks $HERMES_HOME/node/bin, so installed binaries are discovered with no agent-side code change. - System Chrome/Chromium detection short-circuits the ~400 MB Playwright download when a suitable browser already exists. - Bash + PowerShell live as ONE copy each under acp_adapter/bootstrap/. Not duplicated under scripts/. install.sh and install.ps1 keep their inline browser blocks for the source-checkout path. E2E validated end-to-end: bash bootstrap_browser_tools.sh --skip-chromium → installs agent-browser into ~/.hermes/node/bin/ tools.browser_tool._find_agent_browser() → returns the installed path check_browser_requirements() → returns True (browser tools register) Tests: - tests/acp/test_entry.py: 11 tests covering --setup-browser dispatch (linux + windows + --yes forwarding + failure propagation), the terminal-auth follow-up prompt path, and a package-data wheel-shipping assertion that catches any future pyproject.toml regression. Docs: website/docs/user-guide/features/acp.md gains a 'Browser tools (optional)' subsection with the two-line install + what-it-does. --- acp_adapter/bootstrap/__init__.py | 0 .../bootstrap/bootstrap_browser_tools.ps1 | 288 +++++++++++++ .../bootstrap/bootstrap_browser_tools.sh | 399 ++++++++++++++++++ acp_adapter/entry.py | 88 ++++ hermes_cli/main.py | 18 + pyproject.toml | 3 +- tests/acp/test_entry.py | 147 ++++++- website/docs/user-guide/features/acp.md | 21 + 8 files changed, 961 insertions(+), 3 deletions(-) create mode 100644 acp_adapter/bootstrap/__init__.py create mode 100644 acp_adapter/bootstrap/bootstrap_browser_tools.ps1 create mode 100755 acp_adapter/bootstrap/bootstrap_browser_tools.sh diff --git a/acp_adapter/bootstrap/__init__.py b/acp_adapter/bootstrap/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/acp_adapter/bootstrap/bootstrap_browser_tools.ps1 b/acp_adapter/bootstrap/bootstrap_browser_tools.ps1 new file mode 100644 index 00000000000..f840fd2d559 --- /dev/null +++ b/acp_adapter/bootstrap/bootstrap_browser_tools.ps1 @@ -0,0 +1,288 @@ +# bootstrap_browser_tools.ps1 — install agent-browser + Playwright Chromium +# into ~/.hermes/node/ for use by Hermes Agent's browser tools on Windows. +# +# Targets the registry-install path: users who got Hermes via +# `uvx --from 'hermes-agent[acp]==X' hermes-acp` don't have a repo clone, +# so the install.ps1 `npm install`-in-repo flow doesn't apply. This script +# is a self-contained, idempotent slice of install.ps1's browser block. +# +# Usage: +# .\bootstrap_browser_tools.ps1 # use defaults +# .\bootstrap_browser_tools.ps1 -Yes # accept Chromium download +# .\bootstrap_browser_tools.ps1 -SkipChromium # Node + agent-browser only +# +# Idempotent: re-running this is safe and fast. + +[CmdletBinding()] +param( + [switch]$Yes, + [switch]$SkipChromium +) + +$ErrorActionPreference = "Stop" +$NodeVersion = "22" + +# ───────────────────────────────────────────────────────────────────────── +# Logging +# ───────────────────────────────────────────────────────────────────────── + +function Write-Info { param([string]$msg) Write-Host "[*] $msg" -ForegroundColor Cyan } +function Write-Success { param([string]$msg) Write-Host "[+] $msg" -ForegroundColor Green } +function Write-Warn { param([string]$msg) Write-Host "[!] $msg" -ForegroundColor Yellow } +function Write-Err { param([string]$msg) Write-Host "[x] $msg" -ForegroundColor Red } + +# ───────────────────────────────────────────────────────────────────────── +# Paths +# ───────────────────────────────────────────────────────────────────────── + +$HermesHome = $env:HERMES_HOME +if (-not $HermesHome) { + $HermesHome = Join-Path $env:USERPROFILE ".hermes" +} +$NodePrefix = Join-Path $HermesHome "node" + +# ───────────────────────────────────────────────────────────────────────── +# Step 1: Node.js +# ───────────────────────────────────────────────────────────────────────── + +function Resolve-NpmExe { + # Same gotcha as install.ps1: prefer npm.cmd over npm.ps1 so the + # PowerShell execution policy doesn't block us. + $cmd = Get-Command npm -ErrorAction SilentlyContinue + if (-not $cmd) { return $null } + $npmExe = $cmd.Source + if ($npmExe -like "*.ps1") { + $sibling = Join-Path (Split-Path $npmExe -Parent) "npm.cmd" + if (Test-Path $sibling) { return $sibling } + } + return $npmExe +} + +function Resolve-NpxExe { + $cmd = Get-Command npx -ErrorAction SilentlyContinue + if (-not $cmd) { return $null } + $npxExe = $cmd.Source + if ($npxExe -like "*.ps1") { + $sibling = Join-Path (Split-Path $npxExe -Parent) "npx.cmd" + if (Test-Path $sibling) { return $sibling } + } + return $npxExe +} + +function Ensure-Node { + # System Node on PATH? + $sysNode = Get-Command node -ErrorAction SilentlyContinue + if ($sysNode) { + try { + $v = & $sysNode.Source --version + $major = [int]($v -replace '^v(\d+).*', '$1') + if ($major -ge 20) { + Write-Success "Node.js $v found on PATH" + return + } + Write-Warn "Node.js $v is older than v20 — installing managed Node." + } catch { + Write-Warn "Failed to query Node version: $_" + } + } + + # Hermes-managed Node? + $managedNode = Join-Path $NodePrefix "node.exe" + if (Test-Path $managedNode) { + $v = & $managedNode --version + Write-Success "Node.js $v found (Hermes-managed at $NodePrefix)" + # Prepend to current-process PATH so subsequent npm/npx calls find it. + $env:PATH = "$NodePrefix;$env:PATH" + return + } + + Write-Info "Installing Node.js $NodeVersion LTS into $NodePrefix ..." + + $arch = if ([Environment]::Is64BitOperatingSystem) { "x64" } else { "x86" } + $indexUrl = "https://nodejs.org/dist/latest-v${NodeVersion}.x/" + + try { + $indexPage = Invoke-WebRequest -Uri $indexUrl -UseBasicParsing + $matches = [regex]::Matches($indexPage.Content, "node-v${NodeVersion}\.\d+\.\d+-win-${arch}\.zip") + if ($matches.Count -eq 0) { + Write-Err "Could not locate Node.js $NodeVersion zip for win-$arch" + throw "no tarball" + } + $zipName = $matches[0].Value + $zipUrl = "$indexUrl$zipName" + + $tmpDir = Join-Path $env:TEMP "hermes-node-$([guid]::NewGuid().ToString('N'))" + New-Item -ItemType Directory -Force -Path $tmpDir | Out-Null + $zipPath = Join-Path $tmpDir $zipName + + Write-Info "Downloading $zipName ..." + Invoke-WebRequest -Uri $zipUrl -OutFile $zipPath -UseBasicParsing + + Expand-Archive -Path $zipPath -DestinationPath $tmpDir -Force + $extracted = Get-ChildItem -Path $tmpDir -Directory | Where-Object { $_.Name -like "node-v*" } | Select-Object -First 1 + + if (-not $extracted) { Write-Err "Node.js extraction failed"; throw "extract" } + + if (Test-Path $NodePrefix) { Remove-Item -Recurse -Force $NodePrefix } + New-Item -ItemType Directory -Force -Path $HermesHome | Out-Null + Move-Item -Path $extracted.FullName -Destination $NodePrefix + + Remove-Item -Recurse -Force $tmpDir -ErrorAction SilentlyContinue + + $env:PATH = "$NodePrefix;$env:PATH" + $v = & "$NodePrefix\node.exe" --version + Write-Success "Node.js $v installed to $NodePrefix" + } catch { + Write-Err "Node.js install failed: $_" + Write-Info "Install Node 20+ manually from https://nodejs.org/en/download/ and re-run." + throw + } +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 2: agent-browser +# ───────────────────────────────────────────────────────────────────────── + +function Ensure-AgentBrowser { + $npmExe = Resolve-NpmExe + if (-not $npmExe) { + Write-Err "npm not on PATH after Node install — aborting" + throw "npm missing" + } + + # Already installed? + $existing = Get-Command agent-browser -ErrorAction SilentlyContinue + if ($existing) { + Write-Success "agent-browser already installed at $($existing.Source)" + return + } + + # When the user has system Node (winget / installer-based), `npm install + # -g` writes to a directory that may require admin rights. Force the + # prefix to the user-writable Hermes-managed Node directory so we never + # need elevation and the agent can always find the result. Mirrors the + # bash bootstrap's `--prefix $NODE_PREFIX` strategy. + New-Item -ItemType Directory -Force -Path $NodePrefix | Out-Null + + Write-Info "Installing agent-browser (npm, prefix=$NodePrefix)..." + & $npmExe install -g --prefix $NodePrefix --silent ` + "agent-browser@^0.26.0" "@askjo/camofox-browser@^1.5.2" + if ($LASTEXITCODE -ne 0) { + Write-Err "npm install -g agent-browser failed (exit $LASTEXITCODE)" + throw "npm install" + } + + # Windows npm global installs drop shims at $NodePrefix\ root (not bin/). + # Prepend to PATH so any subsequent npx call resolves them. + $env:PATH = "$NodePrefix;$env:PATH" + + Write-Success "agent-browser installed to $NodePrefix" +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 3: Playwright Chromium +# ───────────────────────────────────────────────────────────────────────── + +function Find-SystemBrowser { + $candidates = @( + "C:\Program Files\Google\Chrome\Application\chrome.exe", + "C:\Program Files (x86)\Google\Chrome\Application\chrome.exe", + "C:\Program Files\Chromium\Application\chromium.exe", + "${env:LOCALAPPDATA}\Google\Chrome\Application\chrome.exe", + "${env:LOCALAPPDATA}\Chromium\Application\chromium.exe" + ) + foreach ($p in $candidates) { + if (Test-Path $p) { return $p } + } + # Edge — Chromium-based, agent-browser can use it + foreach ($p in @( + "C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe", + "C:\Program Files\Microsoft\Edge\Application\msedge.exe" + )) { + if (Test-Path $p) { return $p } + } + return $null +} + +function Write-BrowserEnv { + param([string]$BrowserPath) + $envFile = Join-Path $HermesHome ".env" + New-Item -ItemType Directory -Force -Path $HermesHome | Out-Null + if (Test-Path $envFile) { + $existing = Get-Content $envFile -Raw -ErrorAction SilentlyContinue + if ($existing -and ($existing -match "(?m)^AGENT_BROWSER_EXECUTABLE_PATH=")) { + return + } + } + Add-Content -Path $envFile -Value "" + Add-Content -Path $envFile -Value "# Hermes Agent browser tools — use the system Chrome/Chromium/Edge binary." + Add-Content -Path $envFile -Value "AGENT_BROWSER_EXECUTABLE_PATH=$BrowserPath" + Write-Success "Configured browser tools to use $BrowserPath" +} + +function Confirm-ChromiumDownload { + if ($Yes) { return $true } + if (-not [Environment]::UserInteractive) { + Write-Warn "Non-interactive shell — skipping Chromium prompt." + Write-Info "Re-run with -Yes to install Chromium (~400 MB download)." + return $false + } + $reply = Read-Host "Install Playwright Chromium (~400 MB download)? [y/N]" + return ($reply -match "^(y|yes)$") +} + +function Ensure-Chromium { + if ($SkipChromium) { + Write-Info "Skipping Chromium install (-SkipChromium)" + return + } + + # agent-browser on Windows expects a Playwright-managed Chromium under + # %LOCALAPPDATA%\ms-playwright. The system-browser shortcut from the + # Linux/macOS path doesn't apply the same way on Windows — Playwright's + # default launch path won't pick up a stock Chrome install without an + # explicit AGENT_BROWSER_EXECUTABLE_PATH. We still offer it as a + # fallback when the user doesn't want the download. + + if (-not (Confirm-ChromiumDownload)) { + $sys = Find-SystemBrowser + if ($sys) { + Write-Info "Using system browser at $sys (Chromium download skipped)." + Write-BrowserEnv -BrowserPath $sys + } else { + Write-Info "Chromium install skipped. Browser tools won't launch until" + Write-Info "Chromium is installed or AGENT_BROWSER_EXECUTABLE_PATH is set." + } + return + } + + $npxExe = Resolve-NpxExe + if (-not $npxExe) { + Write-Err "npx not on PATH — cannot install Playwright Chromium" + throw "npx missing" + } + + Write-Info "Installing Playwright Chromium (~400 MB) ..." + & $npxExe --yes playwright install chromium + if ($LASTEXITCODE -ne 0) { + Write-Err "Playwright Chromium install failed (exit $LASTEXITCODE)" + Write-Info "Try again later: npx --yes playwright install chromium" + throw "playwright" + } + Write-Success "Playwright Chromium installed" +} + +# ───────────────────────────────────────────────────────────────────────── +# Main +# ───────────────────────────────────────────────────────────────────────── + +Write-Info "Hermes Agent: bootstrapping browser tools" +Write-Info " HERMES_HOME = $HermesHome" +Write-Info " OS = Windows" + +Ensure-Node +Ensure-AgentBrowser +Ensure-Chromium + +Write-Success "Browser tools setup complete." +Write-Info "Hermes Agent will pick up agent-browser from $NodePrefix on next launch." diff --git a/acp_adapter/bootstrap/bootstrap_browser_tools.sh b/acp_adapter/bootstrap/bootstrap_browser_tools.sh new file mode 100755 index 00000000000..9981069a6af --- /dev/null +++ b/acp_adapter/bootstrap/bootstrap_browser_tools.sh @@ -0,0 +1,399 @@ +#!/usr/bin/env bash +# +# bootstrap_browser_tools.sh — install agent-browser + Playwright Chromium +# into ~/.hermes/node/ for use by Hermes Agent's browser tools. +# +# Targets the registry-install path: users who got Hermes via +# `uvx --from 'hermes-agent[acp]==X' hermes-acp` don't have a repo clone, +# so the install.sh `npm install`-in-repo flow doesn't apply. This script +# is a self-contained, idempotent slice of install.sh's browser block — +# safe to run from `hermes-acp --setup-browser`, from a fresh terminal, +# or from install.sh itself (it's a no-op when everything is already in place). +# +# Usage: +# bootstrap_browser_tools.sh # use defaults +# bootstrap_browser_tools.sh --yes # accept the ~400MB Chromium download +# bootstrap_browser_tools.sh --skip-chromium # only install Node + agent-browser +# HERMES_HOME=/custom/path bootstrap_browser_tools.sh +# +# Idempotent: re-running this is safe and fast. Each step checks whether +# the work is already done. + +set -euo pipefail + +# ───────────────────────────────────────────────────────────────────────── +# Config +# ───────────────────────────────────────────────────────────────────────── + +NODE_VERSION="22" +HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}" +NODE_PREFIX="$HERMES_HOME/node" + +SKIP_CHROMIUM=false +ASSUME_YES=false + +# ───────────────────────────────────────────────────────────────────────── +# Logging +# ───────────────────────────────────────────────────────────────────────── + +if [ -t 1 ]; then + C_GREEN='\033[0;32m' + C_YELLOW='\033[0;33m' + C_BLUE='\033[0;34m' + C_RED='\033[0;31m' + C_RESET='\033[0m' +else + C_GREEN='' ; C_YELLOW='' ; C_BLUE='' ; C_RED='' ; C_RESET='' +fi + +log_info() { printf "${C_BLUE}[*]${C_RESET} %s\n" "$*"; } +log_success() { printf "${C_GREEN}[✓]${C_RESET} %s\n" "$*"; } +log_warn() { printf "${C_YELLOW}[!]${C_RESET} %s\n" "$*" >&2; } +log_error() { printf "${C_RED}[✗]${C_RESET} %s\n" "$*" >&2; } + +# ───────────────────────────────────────────────────────────────────────── +# Arg parsing +# ───────────────────────────────────────────────────────────────────────── + +while [ $# -gt 0 ]; do + case "$1" in + --skip-chromium) SKIP_CHROMIUM=true ;; + --yes|-y) ASSUME_YES=true ;; + -h|--help) + cat </dev/null 2>&1; then + local found_ver major + found_ver=$(node --version 2>/dev/null) + major=$(echo "$found_ver" | sed -E 's/^v([0-9]+).*/\1/') + if [ -n "$major" ] && [ "$major" -ge 20 ]; then + log_success "Node.js $found_ver found on PATH" + return 0 + fi + log_warn "Node.js $found_ver is older than v20 — installing managed Node." + fi + + if [ -x "$NODE_PREFIX/bin/node" ]; then + local found_ver + found_ver=$("$NODE_PREFIX/bin/node" --version 2>/dev/null || echo "?") + export PATH="$NODE_PREFIX/bin:$PATH" + log_success "Node.js $found_ver found (Hermes-managed at $NODE_PREFIX)" + return 0 + fi + + log_info "Installing Node.js $NODE_VERSION LTS into $NODE_PREFIX ..." + + local index_url="https://nodejs.org/dist/latest-v${NODE_VERSION}.x/" + local tarball_name + tarball_name=$(curl -fsSL "$index_url" \ + | grep -oE "node-v${NODE_VERSION}\.[0-9]+\.[0-9]+-${NODE_OS}-${NODE_ARCH}\.tar\.xz" \ + | head -1) + + if [ -z "$tarball_name" ]; then + tarball_name=$(curl -fsSL "$index_url" \ + | grep -oE "node-v${NODE_VERSION}\.[0-9]+\.[0-9]+-${NODE_OS}-${NODE_ARCH}\.tar\.gz" \ + | head -1) + fi + + if [ -z "$tarball_name" ]; then + log_error "Could not locate Node.js $NODE_VERSION tarball for $NODE_OS-$NODE_ARCH" + log_info "Install Node 20+ manually: https://nodejs.org/en/download/" + return 1 + fi + + local tmp_dir + tmp_dir=$(mktemp -d) + trap 'rm -rf "$tmp_dir"' RETURN + + log_info "Downloading $tarball_name ..." + if ! curl -fsSL "${index_url}${tarball_name}" -o "$tmp_dir/$tarball_name"; then + log_error "Node.js download failed" + return 1 + fi + + if [[ "$tarball_name" == *.tar.xz ]]; then + tar xf "$tmp_dir/$tarball_name" -C "$tmp_dir" + else + tar xzf "$tmp_dir/$tarball_name" -C "$tmp_dir" + fi + + local extracted_dir + extracted_dir=$(ls -d "$tmp_dir"/node-v* 2>/dev/null | head -1) + if [ ! -d "$extracted_dir" ]; then + log_error "Node.js extraction failed" + return 1 + fi + + mkdir -p "$HERMES_HOME" + rm -rf "$NODE_PREFIX" + mv "$extracted_dir" "$NODE_PREFIX" + + export PATH="$NODE_PREFIX/bin:$PATH" + + local installed_ver + installed_ver=$("$NODE_PREFIX/bin/node" --version 2>/dev/null || echo "?") + log_success "Node.js $installed_ver installed to $NODE_PREFIX" +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 2: agent-browser + @askjo/camofox-browser via global npm install +# ───────────────────────────────────────────────────────────────────────── + +ensure_agent_browser() { + if ! command -v npm >/dev/null 2>&1; then + log_error "npm not on PATH after Node install — aborting" + return 1 + fi + + # _find_agent_browser() in tools/browser_tool.py walks ~/.hermes/node/bin + # plus a few standard prefixes, so installing globally into the managed + # Node prefix is enough — no PATH manipulation needed from the agent side. + if [ -x "$NODE_PREFIX/bin/agent-browser" ] || command -v agent-browser >/dev/null 2>&1; then + log_success "agent-browser already installed" + return 0 + fi + + # When the system's `npm` resolves to a root-owned prefix (e.g. + # /usr/lib/node_modules), `npm install -g` fails with EACCES without + # sudo. Force the prefix to the user-writable Hermes-managed Node + # directory so we never need sudo and the agent can always find the + # result. If we installed Node ourselves above, this is a no-op + # (managed Node already uses $NODE_PREFIX). If the user has system + # Node, we still drop agent-browser under $NODE_PREFIX/bin/ — which + # is exactly where _browser_candidate_path_dirs() looks first. + mkdir -p "$NODE_PREFIX" + + log_info "Installing agent-browser (npm, prefix=$NODE_PREFIX)..." + if ! npm install -g --prefix "$NODE_PREFIX" --silent \ + agent-browser@^0.26.0 \ + "@askjo/camofox-browser@^1.5.2"; then + log_error "npm install -g agent-browser failed" + return 1 + fi + + # macOS/Linux global installs place the shim into $NODE_PREFIX/bin/. + # Add it to PATH for any subsequent steps (npx playwright). + export PATH="$NODE_PREFIX/bin:$PATH" + + log_success "agent-browser installed to $NODE_PREFIX/bin/" +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 3: Playwright Chromium +# ───────────────────────────────────────────────────────────────────────── + +confirm_chromium_download() { + if [ "$ASSUME_YES" = true ]; then return 0; fi + if [ ! -t 0 ]; then + log_warn "Non-interactive shell — skipping Chromium prompt." + log_info "Re-run with --yes to install Chromium (~400 MB download)." + return 1 + fi + printf "Install Playwright Chromium (~400 MB download)? [y/N] " + local reply="" + read -r reply || reply="" + case "$reply" in + y|Y|yes|YES) return 0 ;; + *) return 1 ;; + esac +} + +# Detect a usable system Chrome/Chromium. agent-browser's Chrome engine can +# use it instead of downloading Playwright's bundled Chromium, saving the +# download cost. Returns the path or empty string. +find_system_browser() { + local candidate + for candidate in google-chrome google-chrome-stable chromium chromium-browser chrome; do + if command -v "$candidate" >/dev/null 2>&1; then + command -v "$candidate" + return 0 + fi + done + # macOS app-bundle locations + if [ "$OS" = "macos" ]; then + for candidate in \ + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \ + "/Applications/Chromium.app/Contents/MacOS/Chromium" ; do + if [ -x "$candidate" ]; then + echo "$candidate" + return 0 + fi + done + fi + return 1 +} + +write_browser_env() { + local browser_path="$1" + local env_file="$HERMES_HOME/.env" + mkdir -p "$HERMES_HOME" + if [ -f "$env_file" ] && grep -q "^AGENT_BROWSER_EXECUTABLE_PATH=" "$env_file"; then + return 0 + fi + { + echo "" + echo "# Hermes Agent browser tools — use the system Chrome/Chromium binary." + echo "AGENT_BROWSER_EXECUTABLE_PATH=$browser_path" + } >> "$env_file" + log_success "Configured browser tools to use $browser_path" +} + +ensure_chromium() { + if [ "$SKIP_CHROMIUM" = true ]; then + log_info "Skipping Chromium install (--skip-chromium)" + return 0 + fi + + local system_browser + system_browser="$(find_system_browser 2>/dev/null || true)" + if [ -n "$system_browser" ]; then + log_success "Found system browser: $system_browser" + log_info "Skipping Playwright Chromium download; agent-browser will use it." + write_browser_env "$system_browser" + return 0 + fi + + if ! confirm_chromium_download; then + log_info "Chromium install skipped. Browser tools will only work if you" + log_info "set AGENT_BROWSER_EXECUTABLE_PATH or install Chromium later." + return 0 + fi + + if ! command -v npx >/dev/null 2>&1; then + log_error "npx not on PATH — cannot install Playwright Chromium" + return 1 + fi + + log_info "Installing Playwright Chromium (~400 MB) ..." + + # On apt-based distros, --with-deps requires sudo. Try non-interactively + # only — never prompt — and fall back to the bare browser-only install. + local installed=false + if [ "$OS" = "linux" ]; then + case "$DISTRO" in + ubuntu|debian|raspbian|pop|linuxmint|elementary|zorin|kali|parrot) + if [ "$(id -u)" -eq 0 ] || (command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null); then + log_info "Installing system deps with --with-deps (sudo available)" + if npx --yes playwright install --with-deps chromium; then + installed=true + fi + else + log_warn "sudo not available non-interactively — installing Chromium without system deps." + log_info "If browser tools fail to launch, an administrator should run:" + log_info " sudo npx playwright install-deps chromium" + fi + ;; + arch|manjaro|cachyos|endeavouros|garuda) + log_info "Arch-family system dependencies are not auto-installed." + log_info "If launch fails, run: sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib" + ;; + fedora|rhel|centos|rocky|alma) + log_info "Fedora/RHEL system dependencies are not auto-installed." + log_info "If launch fails, run: sudo dnf install nss atk at-spi2-core cups-libs libdrm libxkbcommon mesa-libgbm pango cairo alsa-lib" + ;; + opensuse*|sles) + log_info "openSUSE system dependencies are not auto-installed." + ;; + esac + fi + + if [ "$installed" = false ]; then + if npx --yes playwright install chromium; then + installed=true + fi + fi + + if [ "$installed" = true ]; then + log_success "Playwright Chromium installed" + else + log_error "Playwright Chromium install failed" + log_info "Try again later: npx --yes playwright install chromium" + return 1 + fi +} + +# ───────────────────────────────────────────────────────────────────────── +# Main +# ───────────────────────────────────────────────────────────────────────── + +main() { + log_info "Hermes Agent: bootstrapping browser tools" + log_info " HERMES_HOME = $HERMES_HOME" + log_info " OS / arch = $NODE_OS-$NODE_ARCH ${DISTRO:+($DISTRO)}" + + ensure_node + ensure_agent_browser + ensure_chromium + + log_success "Browser tools setup complete." + log_info "Hermes Agent will pick up agent-browser from $NODE_PREFIX/bin/ on next launch." +} + +main diff --git a/acp_adapter/entry.py b/acp_adapter/entry.py index 48e677a6522..cf5c2ba9cfb 100644 --- a/acp_adapter/entry.py +++ b/acp_adapter/entry.py @@ -124,6 +124,20 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: action="store_true", help="Run interactive Hermes provider/model setup for ACP terminal auth", ) + parser.add_argument( + "--setup-browser", + action="store_true", + help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ " + "for browser tool support. Idempotent.", + ) + parser.add_argument( + "--yes", + "-y", + action="store_true", + dest="assume_yes", + help="Accept all prompts (currently used by --setup-browser to skip the " + "~400 MB Chromium download confirmation).", + ) return parser.parse_args(argv) @@ -150,6 +164,75 @@ def _run_setup() -> None: finally: sys.argv = old_argv + # Offer browser-tools install as a follow-up. The terminal auth method + # is the one supported first-run UX for registry installs, so this is + # the natural moment to ask. Skip silently if stdin isn't a TTY (the + # answer can't be collected anyway). + if not sys.stdin.isatty(): + return + try: + reply = input( + "\nInstall browser tools? Downloads agent-browser (npm) and " + "optionally Playwright Chromium (~400 MB). [y/N] " + ).strip().lower() + except (EOFError, KeyboardInterrupt): + return + if reply in {"y", "yes"}: + _run_setup_browser(assume_yes=False) + + +def _run_setup_browser(assume_yes: bool = False) -> int: + """Bootstrap agent-browser + Playwright Chromium for the registry-install path. + + Shells out to the bundled platform-specific bootstrap script + (acp_adapter/bootstrap/bootstrap_browser_tools.{sh,ps1}) so the install + logic lives in one place — readable, debuggable, and shareable with + install.sh / install.ps1 if we ever want to call it from there too. + + Returns the script's exit code (0 on success). + """ + import platform + import subprocess + + bootstrap_dir = Path(__file__).resolve().parent / "bootstrap" + + if platform.system() == "Windows": + script = bootstrap_dir / "bootstrap_browser_tools.ps1" + if not script.is_file(): + print( + f"Bootstrap script not found at {script} — wheel may be incomplete.", + file=sys.stderr, + ) + return 1 + cmd = [ + "powershell.exe", + "-NoProfile", + "-ExecutionPolicy", "Bypass", + "-File", str(script), + ] + if assume_yes: + cmd.append("-Yes") + else: + script = bootstrap_dir / "bootstrap_browser_tools.sh" + if not script.is_file(): + print( + f"Bootstrap script not found at {script} — wheel may be incomplete.", + file=sys.stderr, + ) + return 1 + cmd = ["bash", str(script)] + if assume_yes: + cmd.append("--yes") + + # stdio is inherited so the user sees the bootstrap's progress live. + try: + result = subprocess.run(cmd, check=False) + except FileNotFoundError as exc: + # bash / powershell.exe not on PATH + print(f"Could not launch browser bootstrap: {exc}", file=sys.stderr) + return 1 + return result.returncode + def main(argv: list[str] | None = None) -> None: """Entry point: load env, configure logging, run the ACP agent.""" @@ -163,6 +246,11 @@ def main(argv: list[str] | None = None) -> None: if args.setup: _run_setup() return + if args.setup_browser: + rc = _run_setup_browser(assume_yes=args.assume_yes) + if rc != 0: + sys.exit(rc) + return _setup_logging() _load_env() diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 6b770edaf28..833172a23b9 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -11715,6 +11715,20 @@ Examples: action="store_true", help="Run interactive Hermes provider/model setup for ACP terminal auth", ) + acp_parser.add_argument( + "--setup-browser", + action="store_true", + help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ " + "for browser tool support (idempotent).", + ) + acp_parser.add_argument( + "--yes", + "-y", + action="store_true", + dest="assume_yes", + help="Accept all prompts (used by --setup-browser to skip the " + "~400 MB Chromium download confirmation).", + ) def cmd_acp(args): """Launch Hermes Agent as an ACP server.""" @@ -11728,6 +11742,10 @@ Examples: acp_argv.append("--check") if getattr(args, "setup", False): acp_argv.append("--setup") + if getattr(args, "setup_browser", False): + acp_argv.append("--setup-browser") + if getattr(args, "assume_yes", False): + acp_argv.append("--yes") acp_main(acp_argv) except ImportError: print("ACP dependencies not installed.", file=sys.stderr) diff --git a/pyproject.toml b/pyproject.toml index 20fecac228e..ae2fff385a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -212,9 +212,10 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector [tool.setuptools.package-data] hermes_cli = ["web_dist/**/*"] gateway = ["assets/**/*"] +acp_adapter = ["bootstrap/*.sh", "bootstrap/*.ps1"] [tool.setuptools.packages.find] -include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"] +include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "acp_adapter.*", "plugins", "plugins.*", "providers", "providers.*"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/tests/acp/test_entry.py b/tests/acp/test_entry.py index 4c7e55f1d4b..81d30cd868c 100644 --- a/tests/acp/test_entry.py +++ b/tests/acp/test_entry.py @@ -1,6 +1,9 @@ """Tests for acp_adapter.entry startup wiring.""" +import sys + import acp +import pytest from acp_adapter import entry @@ -42,12 +45,152 @@ def test_main_setup_runs_model_configuration(monkeypatch): calls = {} def fake_hermes_main(): - import sys - calls["argv"] = sys.argv[:] monkeypatch.setattr("hermes_cli.main.main", fake_hermes_main) + # Pretend stdin is not a TTY so the follow-up browser prompt is skipped. + # That keeps this test focused on the model-setup wiring; the + # browser-prompt path has its own test below. + monkeypatch.setattr("sys.stdin.isatty", lambda: False) entry.main(["--setup"]) assert calls["argv"][1:] == ["model"] + + +def test_main_setup_offers_browser_install_when_tty(monkeypatch): + """When stdin is a TTY and the user answers yes, model setup is followed + by a browser-tools bootstrap call.""" + monkeypatch.setattr("hermes_cli.main.main", lambda: None) + monkeypatch.setattr("sys.stdin.isatty", lambda: True) + monkeypatch.setattr("builtins.input", lambda *_args, **_kwargs: "y") + + bootstrap_calls = [] + monkeypatch.setattr( + entry, + "_run_setup_browser", + lambda assume_yes=False: bootstrap_calls.append(assume_yes) or 0, + ) + + entry.main(["--setup"]) + + assert bootstrap_calls == [False] + + +def test_main_setup_skips_browser_prompt_on_no(monkeypatch): + monkeypatch.setattr("hermes_cli.main.main", lambda: None) + monkeypatch.setattr("sys.stdin.isatty", lambda: True) + monkeypatch.setattr("builtins.input", lambda *_args, **_kwargs: "") + + called = [] + monkeypatch.setattr( + entry, + "_run_setup_browser", + lambda assume_yes=False: called.append(assume_yes) or 0, + ) + + entry.main(["--setup"]) + + assert called == [] + + +def test_main_setup_browser_invokes_bundled_script(monkeypatch): + """`hermes-acp --setup-browser` must shell out to the bundled bootstrap + script — never reimplement the install logic inline.""" + monkeypatch.setattr("platform.system", lambda: "Linux") + + captured = {} + + def fake_run(cmd, check=False): + captured["cmd"] = cmd + + class _R: + returncode = 0 + + return _R() + + monkeypatch.setattr("subprocess.run", fake_run) + + entry.main(["--setup-browser"]) + + assert captured["cmd"][0] == "bash" + assert captured["cmd"][1].endswith("bootstrap_browser_tools.sh") + # --yes is NOT passed when the flag is absent. + assert "--yes" not in captured["cmd"] + + +def test_main_setup_browser_forwards_yes_flag(monkeypatch): + monkeypatch.setattr("platform.system", lambda: "Linux") + + captured = {} + + def fake_run(cmd, check=False): + captured["cmd"] = cmd + + class _R: + returncode = 0 + + return _R() + + monkeypatch.setattr("subprocess.run", fake_run) + + entry.main(["--setup-browser", "--yes"]) + + assert "--yes" in captured["cmd"] + + +def test_main_setup_browser_uses_powershell_on_windows(monkeypatch): + monkeypatch.setattr("platform.system", lambda: "Windows") + + captured = {} + + def fake_run(cmd, check=False): + captured["cmd"] = cmd + + class _R: + returncode = 0 + + return _R() + + monkeypatch.setattr("subprocess.run", fake_run) + + entry.main(["--setup-browser", "--yes"]) + + assert captured["cmd"][0] == "powershell.exe" + assert any(part.endswith("bootstrap_browser_tools.ps1") for part in captured["cmd"]) + assert "-Yes" in captured["cmd"] + + +def test_main_setup_browser_propagates_failure(monkeypatch): + monkeypatch.setattr("platform.system", lambda: "Linux") + + class _R: + returncode = 7 + + monkeypatch.setattr("subprocess.run", lambda cmd, check=False: _R()) + + with pytest.raises(SystemExit) as excinfo: + entry.main(["--setup-browser"]) + assert excinfo.value.code == 7 + + +def test_bootstrap_scripts_ship_with_package(): + """The package-data wiring (pyproject.toml) must include the bootstrap + scripts — otherwise `--setup-browser` 404s at runtime.""" + from pathlib import Path + + bootstrap_dir = Path(entry.__file__).resolve().parent / "bootstrap" + sh = bootstrap_dir / "bootstrap_browser_tools.sh" + ps1 = bootstrap_dir / "bootstrap_browser_tools.ps1" + + assert sh.is_file(), f"missing bundled script: {sh}" + assert ps1.is_file(), f"missing bundled script: {ps1}" + + sh_text = sh.read_text(encoding="utf-8") + ps1_text = ps1.read_text(encoding="utf-8") + + # Sanity: scripts know how to find the Hermes-managed Node prefix. + assert "HERMES_HOME" in sh_text + assert "agent-browser" in sh_text + assert "HermesHome" in ps1_text + assert "agent-browser" in ps1_text diff --git a/website/docs/user-guide/features/acp.md b/website/docs/user-guide/features/acp.md index 92a755c9ada..6540748c889 100644 --- a/website/docs/user-guide/features/acp.md +++ b/website/docs/user-guide/features/acp.md @@ -78,6 +78,27 @@ hermes acp --version hermes acp --check ``` +### Browser tools (optional) + +Browser tools (`browser_navigate`, `browser_click`, etc.) depend on the +`agent-browser` npm package and Chromium, which aren't part of the Python +wheel. Install them with: + +```bash +hermes acp --setup-browser # interactive (prompts before ~400 MB download) +hermes acp --setup-browser --yes # accept the download non-interactively +``` + +This is the standalone command. The Zed registry's terminal-auth flow (`hermes acp --setup`) also offers the browser bootstrap as a follow-up question after model selection, so most users never need to run `--setup-browser` directly. + +What it does: + +- Installs Node.js 22 LTS into `~/.hermes/node/` if missing +- `npm install -g agent-browser @askjo/camofox-browser` into that prefix (no sudo needed — `npm`'s `--prefix` points at the user-writable Hermes-managed Node) +- Installs Playwright Chromium, or uses a detected system Chrome/Chromium when available + +The bootstrap is idempotent — re-running it is fast and skips work that's already done. + ## Editor setup ### VS Code From 09d9724a09197b1981c318f3c51c55bc52fdfe29 Mon Sep 17 00:00:00 2001 From: Mibayy Date: Fri, 15 May 2026 01:33:49 -0700 Subject: [PATCH 022/218] feat(gateway): add SimpleX Chat platform plugin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SimpleX Chat (https://simplex.chat) is a private, decentralised messenger with no persistent user IDs — every contact is identified by an opaque internal ID generated at connection time. This adds it as a Hermes gateway platform via the plugin system. The adapter connects to a local simplex-chat daemon via WebSocket, listens for inbound messages, and sends replies. Originally proposed in PR #2558 as a core-modifying integration; reshaped here as a self- contained plugin under plugins/platforms/simplex/ with no edits to any core file. Discovery is filesystem-based (scanned by gateway.config), and the platform identity is resolved on demand via Platform("simplex"). Plugin contract: - check_requirements() requires SIMPLEX_WS_URL AND the websockets package - validate_config() / is_connected() accept env or config.yaml input - _env_enablement() seeds PlatformConfig.extra (ws_url + home_channel) - _standalone_send() supports out-of-process cron delivery - interactive_setup() provides a stdin wizard for hermes gateway setup - register() wires the adapter into the registry with required_env, install_hint, cron_deliver_env_var, allowed_users_env, and a platform_hint for the LLM. Lazy dependency: the websockets Python package is imported inside the functions that need it. The plugin is importable and discoverable even when websockets is missing — check_requirements() simply returns False until `pip install websockets` is run. No new pyproject extras are introduced. Environment variables: SIMPLEX_WS_URL WebSocket URL of the daemon (required) SIMPLEX_ALLOWED_USERS Comma-separated allowed contact IDs SIMPLEX_ALLOW_ALL_USERS Set true to allow all contacts SIMPLEX_HOME_CHANNEL Default contact for cron delivery SIMPLEX_HOME_CHANNEL_NAME Human label for the home channel Closes #2557. --- plugins/platforms/simplex/__init__.py | 3 + plugins/platforms/simplex/adapter.py | 746 +++++++++++++++++++ plugins/platforms/simplex/plugin.yaml | 37 + tests/gateway/test_simplex_plugin.py | 347 +++++++++ website/docs/user-guide/messaging/simplex.md | 99 +++ 5 files changed, 1232 insertions(+) create mode 100644 plugins/platforms/simplex/__init__.py create mode 100644 plugins/platforms/simplex/adapter.py create mode 100644 plugins/platforms/simplex/plugin.yaml create mode 100644 tests/gateway/test_simplex_plugin.py create mode 100644 website/docs/user-guide/messaging/simplex.md diff --git a/plugins/platforms/simplex/__init__.py b/plugins/platforms/simplex/__init__.py new file mode 100644 index 00000000000..d4f1d7bf0e3 --- /dev/null +++ b/plugins/platforms/simplex/__init__.py @@ -0,0 +1,3 @@ +from .adapter import register + +__all__ = ["register"] diff --git a/plugins/platforms/simplex/adapter.py b/plugins/platforms/simplex/adapter.py new file mode 100644 index 00000000000..b568f29bbb5 --- /dev/null +++ b/plugins/platforms/simplex/adapter.py @@ -0,0 +1,746 @@ +"""SimpleX Chat platform adapter (Hermes plugin). + +Connects to a simplex-chat daemon running in WebSocket mode. +Inbound messages arrive via a persistent WebSocket connection. +Outbound messages use the same WebSocket with JSON commands. + +This adapter ships as a Hermes platform plugin under +``plugins/platforms/simplex/``. The Hermes plugin loader scans the +directory at startup, calls ``register(ctx)``, and the platform +becomes available to ``gateway/run.py`` and ``tools/send_message_tool`` +through the registry — no edits to core files are required. + +SimpleX chat daemon setup: + simplex-chat -p 5225 # start daemon on port 5225 + # or via Docker: + # docker run -p 5225:5225 simplexchat/simplex-chat-cli -p 5225 + +Required environment variables: + SIMPLEX_WS_URL WebSocket URL of the daemon + (default: ws://127.0.0.1:5225) + +Optional environment variables: + SIMPLEX_ALLOWED_USERS Comma-separated contact IDs (allowlist) + SIMPLEX_ALLOW_ALL_USERS Set 'true' to allow all contacts + SIMPLEX_HOME_CHANNEL Default contact/group ID for cron delivery + SIMPLEX_HOME_CHANNEL_NAME Human label for the home channel + +The ``websockets`` Python package is imported lazily — the plugin is +discoverable and `hermes setup` can describe it even when websockets is +not installed. ``check_requirements()`` returns False until the package +is present, so the gateway will not attempt to instantiate the adapter. +""" + +import asyncio +import json +import logging +import os +import random +import time +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +# Lazy import: BasePlatformAdapter and friends live in the main repo. +# Imported at module top because they're stdlib-only inside Hermes — no +# external dependency that would block the plugin from loading. +from gateway.config import Platform, PlatformConfig +from gateway.platforms.base import ( + BasePlatformAdapter, + MessageEvent, + MessageType, + SendResult, + cache_image_from_bytes, + cache_audio_from_bytes, + cache_document_from_bytes, +) + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- +MAX_MESSAGE_LENGTH = 16_000 # SimpleX has no hard limit; keep chunking sane +TYPING_INTERVAL = 10.0 +WS_RETRY_DELAY_INITIAL = 2.0 +WS_RETRY_DELAY_MAX = 60.0 +HEALTH_CHECK_INTERVAL = 30.0 +HEALTH_CHECK_STALE_THRESHOLD = 120.0 + +# Correlation ID prefix for requests we send so we can ignore our own echoes. +_CORR_PREFIX = "hermes-" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _parse_comma_list(value: str) -> List[str]: + """Split a comma-separated string into a stripped list.""" + return [v.strip() for v in value.split(",") if v.strip()] + + +def _guess_extension(data: bytes) -> str: + """Guess file extension from magic bytes.""" + if data[:4] == b"\x89PNG": + return ".png" + if data[:2] == b"\xff\xd8": + return ".jpg" + if data[:4] == b"GIF8": + return ".gif" + if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP": + return ".webp" + if data[:4] == b"%PDF": + return ".pdf" + if len(data) >= 8 and data[4:8] == b"ftyp": + return ".mp4" + if data[:4] == b"OggS": + return ".ogg" + if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0: + return ".mp3" + return ".bin" + + +def _is_image_ext(ext: str) -> bool: + return ext.lower() in (".jpg", ".jpeg", ".png", ".gif", ".webp") + + +def _is_audio_ext(ext: str) -> bool: + return ext.lower() in (".mp3", ".wav", ".ogg", ".m4a", ".aac") + + +# --------------------------------------------------------------------------- +# SimpleX Adapter +# --------------------------------------------------------------------------- + +class SimplexAdapter(BasePlatformAdapter): + """SimpleX Chat adapter using the simplex-chat daemon WebSocket API. + + Instantiated by the ``adapter_factory`` passed to + ``ctx.register_platform()`` in :func:`register`. + """ + + def __init__(self, config: PlatformConfig, **kwargs): + platform = Platform("simplex") + super().__init__(config=config, platform=platform) + + extra = getattr(config, "extra", {}) or {} + self.ws_url = extra.get("ws_url", "ws://127.0.0.1:5225").rstrip("/") + + # Running state + self._ws = None # websockets connection + self._ws_task: Optional[asyncio.Task] = None + self._health_task: Optional[asyncio.Task] = None + self._typing_tasks: Dict[str, asyncio.Task] = {} + self._running = False + self._last_ws_activity = 0.0 + + # Track sent correlation IDs to filter echoes + self._pending_corr_ids: set = set() + self._max_pending_corr = 200 + + logger.info("SimpleX adapter initialized: url=%s", self.ws_url) + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + async def connect(self) -> bool: + """Connect to the simplex-chat daemon and start the WebSocket listener.""" + try: + import websockets # noqa: F401 + except ImportError: + logger.error( + "SimpleX: 'websockets' package not installed. " + "Run: pip install websockets" + ) + return False + + if not self.ws_url: + logger.error("SimpleX: SIMPLEX_WS_URL is required") + return False + + # Quick connectivity check — try to open and immediately close + try: + import websockets as _wsclient + async with _wsclient.connect(self.ws_url, open_timeout=10): + pass + except Exception as e: + logger.error("SimpleX: cannot reach daemon at %s: %s", self.ws_url, e) + return False + + self._running = True + self._last_ws_activity = time.time() + self._ws_task = asyncio.create_task(self._ws_listener()) + self._health_task = asyncio.create_task(self._health_monitor()) + + logger.info("SimpleX: connected to %s", self.ws_url) + return True + + async def disconnect(self) -> None: + """Stop WebSocket listener and clean up.""" + self._running = False + + if self._ws_task: + self._ws_task.cancel() + try: + await self._ws_task + except asyncio.CancelledError: + pass + + if self._health_task: + self._health_task.cancel() + try: + await self._health_task + except asyncio.CancelledError: + pass + + for task in self._typing_tasks.values(): + task.cancel() + self._typing_tasks.clear() + + if self._ws: + try: + await self._ws.close() + except Exception: + pass + self._ws = None + + logger.info("SimpleX: disconnected") + + # ------------------------------------------------------------------ + # WebSocket listener + # ------------------------------------------------------------------ + + async def _ws_listener(self) -> None: + """Maintain a persistent WebSocket connection to the daemon.""" + import websockets as _wsclient + import websockets as _wsexc + + backoff = WS_RETRY_DELAY_INITIAL + + while self._running: + try: + logger.debug("SimpleX WS: connecting to %s", self.ws_url) + async with _wsclient.connect( + self.ws_url, + ping_interval=20, + ping_timeout=20, + ) as ws: + self._ws = ws + backoff = WS_RETRY_DELAY_INITIAL + self._last_ws_activity = time.time() + logger.info("SimpleX WS: connected") + + async for raw in ws: + if not self._running: + break + self._last_ws_activity = time.time() + try: + msg = json.loads(raw) + await self._handle_event(msg) + except json.JSONDecodeError: + logger.debug("SimpleX WS: invalid JSON: %.100s", raw) + except Exception: + logger.exception("SimpleX WS: error handling event") + + except asyncio.CancelledError: + break + except _wsexc.WebSocketException as e: + if self._running: + logger.warning( + "SimpleX WS: error: %s (reconnecting in %.0fs)", e, backoff + ) + except Exception as e: + if self._running: + logger.warning( + "SimpleX WS: unexpected error: %s (reconnecting in %.0fs)", + e, backoff, + ) + finally: + self._ws = None + + if self._running: + jitter = backoff * 0.2 * random.random() + await asyncio.sleep(backoff + jitter) + backoff = min(backoff * 2, WS_RETRY_DELAY_MAX) + + # ------------------------------------------------------------------ + # Health monitor + # ------------------------------------------------------------------ + + async def _health_monitor(self) -> None: + """Force reconnect if the WebSocket has been idle too long.""" + while self._running: + await asyncio.sleep(HEALTH_CHECK_INTERVAL) + if not self._running: + break + + elapsed = time.time() - self._last_ws_activity + if elapsed > HEALTH_CHECK_STALE_THRESHOLD: + logger.warning( + "SimpleX: WS idle for %.0fs, forcing reconnect", elapsed + ) + self._last_ws_activity = time.time() + if self._ws: + try: + await self._ws.close() + except Exception: + pass + + # ------------------------------------------------------------------ + # Inbound event handling + # ------------------------------------------------------------------ + + async def _handle_event(self, event: dict) -> None: + """Dispatch a daemon event to the appropriate handler.""" + resp_type = event.get("type") or event.get("resp", {}).get("type", "") + + # Filter responses to our own commands (echoes) + corr_id = event.get("corrId", "") + if corr_id and corr_id.startswith(_CORR_PREFIX): + self._pending_corr_ids.discard(corr_id) + return + + if resp_type == "newChatItem": + await self._handle_new_chat_item(event) + elif resp_type == "newChatItems": + # Batch variant — process each item + items = event.get("chatItems") or [] + for item_wrapper in items: + await self._handle_new_chat_item(item_wrapper) + # Ignore all other event types (delivery receipts, contact updates, etc.) + + async def _handle_new_chat_item(self, wrapper: dict) -> None: + """Process a single newChatItem event into a MessageEvent.""" + # The daemon wraps the chat item differently depending on version; + # normalise both layouts. + chat_info = wrapper.get("chatInfo") or wrapper.get("chat") or {} + chat_item = wrapper.get("chatItem") or wrapper.get("item") or {} + + # Only process messages (not calls, deleted items, etc.) + item_content = chat_item.get("content") or {} + msg_content = item_content.get("msgContent") or {} + if not msg_content: + return + + # Filter out messages sent by us (direction == "snd") + meta = chat_item.get("meta") or {} + direction = (meta.get("itemStatus") or {}).get("type", "") + if direction in ("sndSent", "sndSentDirect", "sndSentViaProxy", "sndNew"): + return + + # Determine chat type and IDs + chat_type_raw = chat_info.get("type", "") + is_group = chat_type_raw in ("group", "groupInfo") + + if is_group: + group_info = chat_info.get("groupInfo") or chat_info.get("group") or {} + group_id = str(group_info.get("groupId") or group_info.get("id") or "") + group_name = group_info.get("displayName") or group_info.get("groupProfile", {}).get("displayName", "") + chat_id = f"group:{group_id}" if group_id else "" + chat_name = group_name + else: + contact_info = chat_info.get("contact") or {} + contact_id = str(contact_info.get("contactId") or contact_info.get("id") or "") + contact_name = ( + contact_info.get("displayName") + or contact_info.get("localDisplayName") + or contact_id + ) + chat_id = contact_id + chat_name = contact_name + + if not chat_id: + logger.debug("SimpleX: ignoring event with no chat_id") + return + + # Sender — for groups the message includes a chatItemMember sub-object + member = chat_item.get("chatItemMember") or {} + if is_group and member: + sender_id = str(member.get("memberId") or member.get("id") or chat_id) + sender_name = ( + member.get("displayName") + or member.get("localDisplayName") + or sender_id + ) + else: + sender_id = chat_id + sender_name = chat_name + + # Extract text + text = msg_content.get("text") or "" + + # Media attachments + media_urls: List[str] = [] + media_types: List[str] = [] + file_info = chat_item.get("file") or {} + if file_info and file_info.get("fileStatus") not in ("cancelled", "error"): + file_id = file_info.get("fileId") + file_name = file_info.get("fileName", "file") + if file_id: + try: + cached = await self._fetch_file(file_id, file_name) + if cached: + ext = cached.rsplit(".", 1)[-1] + if _is_image_ext("." + ext): + media_types.append("image/" + ext.replace("jpg", "jpeg")) + elif _is_audio_ext("." + ext): + media_types.append("audio/" + ext) + else: + media_types.append("application/octet-stream") + media_urls.append(cached) + except Exception: + logger.exception("SimpleX: failed to fetch file %s", file_id) + + # Timestamp + ts_str = meta.get("itemTs") or meta.get("createdAt") or "" + try: + timestamp = datetime.fromisoformat(ts_str.replace("Z", "+00:00")) + except (ValueError, AttributeError): + timestamp = datetime.now(tz=timezone.utc) + + # Build source + source = self.build_source( + chat_id=chat_id, + chat_name=chat_name, + chat_type="group" if is_group else "dm", + user_id=sender_id, + user_name=sender_name, + ) + + # Message type + msg_type = MessageType.TEXT + if media_types: + if any(mt.startswith("audio/") for mt in media_types): + msg_type = MessageType.VOICE + elif any(mt.startswith("image/") for mt in media_types): + msg_type = MessageType.PHOTO + + event_obj = MessageEvent( + source=source, + text=text, + message_type=msg_type, + media_urls=media_urls, + media_types=media_types, + timestamp=timestamp, + raw_message=wrapper, + ) + + await self.handle_message(event_obj) + + async def _fetch_file(self, file_id: Any, file_name: str) -> Optional[str]: + """Ask the daemon to receive and return a file attachment.""" + # simplex-chat exposes `/api/v1/files/{fileId}` on an HTTP port + # when started with --http-port. However, the canonical WebSocket API + # does not have a direct binary download command; files are stored on + # the local filesystem after the daemon accepts them. + # + # We request acceptance first, then read from the daemon's local path. + corr_id = self._make_corr_id() + cmd = { + "corrId": corr_id, + "cmd": f"/freceive {file_id}", + } + await self._send_ws(cmd) + # The daemon will emit a chatItemUpdated event when the file lands; + # for simplicity we just wait briefly and rely on the daemon's default path. + await asyncio.sleep(2) + + # simplex-chat stores received files in ~/Downloads or a configured path. + # We try common locations. + for search_dir in ( + os.path.expanduser("~/Downloads"), + os.path.expanduser("~/.simplex/files"), + "/tmp/simplex_files", + ): + candidate = os.path.join(search_dir, file_name) + if os.path.exists(candidate): + with open(candidate, "rb") as f: + data = f.read() + ext = _guess_extension(data) + if _is_image_ext(ext): + return cache_image_from_bytes(data, ext) + elif _is_audio_ext(ext): + return cache_audio_from_bytes(data, ext) + else: + return cache_document_from_bytes(data, file_name) + return None + + # ------------------------------------------------------------------ + # Outbound messages + # ------------------------------------------------------------------ + + def _make_corr_id(self) -> str: + """Generate a unique correlation ID for a request.""" + corr_id = f"{_CORR_PREFIX}{int(time.time() * 1000)}-{random.randint(0, 9999)}" + self._pending_corr_ids.add(corr_id) + if len(self._pending_corr_ids) > self._max_pending_corr: + # Trim oldest — sets are unordered so just clear the oldest half + to_remove = list(self._pending_corr_ids)[:self._max_pending_corr // 2] + self._pending_corr_ids -= set(to_remove) + return corr_id + + async def _send_ws(self, payload: dict) -> None: + """Send a JSON payload over the WebSocket, queuing if not yet connected.""" + import websockets as _wsexc + ws = self._ws + if not ws: + logger.debug("SimpleX: WS not connected, dropping outbound command") + return + try: + await ws.send(json.dumps(payload)) + except _wsexc.ConnectionClosed: + logger.warning("SimpleX: WS closed while sending") + except Exception as e: + logger.warning("SimpleX: WS send error: %s", e) + + async def send( + self, + chat_id: str, + content: str, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Send a text message to a contact or group.""" + corr_id = self._make_corr_id() + + if chat_id.startswith("group:"): + group_id = chat_id[6:] + cmd_str = f"#[{group_id}] {content}" + else: + cmd_str = f"@[{chat_id}] {content}" + + payload = { + "corrId": corr_id, + "cmd": cmd_str, + } + + await self._send_ws(payload) + return SendResult(success=True) + + async def send_typing(self, chat_id: str, metadata=None) -> None: + """SimpleX does not expose a typing indicator API — no-op.""" + pass + + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Send an image (URL) as a message with optional caption. + + SimpleX has no native ``send_image`` over the WebSocket API — file + attachments require the daemon's filesystem-backed flow which is + not driven from this adapter. Fall back to a plain text message + containing the URL and caption. + """ + text = f"{caption}\n{image_url}".strip() if caption else image_url + return await self.send(chat_id, text, reply_to=reply_to, metadata=metadata) + + async def get_chat_info(self, chat_id: str) -> dict: + """Return basic chat info.""" + if chat_id.startswith("group:"): + return {"chat_id": chat_id, "type": "group", "name": chat_id[6:]} + return {"chat_id": chat_id, "type": "dm", "name": chat_id} + + +# --------------------------------------------------------------------------- +# Plugin entry-point hooks +# --------------------------------------------------------------------------- + +def check_requirements() -> bool: + """Plugin gate: require SIMPLEX_WS_URL AND the websockets package. + + Returning False keeps the platform out of ``get_connected_platforms()`` + so the gateway never instantiates the adapter when the dependency is + missing or no daemon URL is configured. + """ + if not os.getenv("SIMPLEX_WS_URL"): + return False + try: + import websockets # noqa: F401 + except ImportError: + return False + return True + + +def validate_config(config) -> bool: + """Validate that the platform config has enough info to connect.""" + extra = getattr(config, "extra", {}) or {} + ws_url = os.getenv("SIMPLEX_WS_URL") or extra.get("ws_url", "") + return bool(ws_url) + + +def is_connected(config) -> bool: + """Check whether SimpleX is configured (env or config.yaml).""" + extra = getattr(config, "extra", {}) or {} + ws_url = os.getenv("SIMPLEX_WS_URL") or extra.get("ws_url", "") + return bool(ws_url) + + +def _env_enablement() -> dict | None: + """Seed ``PlatformConfig.extra`` from env vars during gateway config load. + + Called by the platform registry's env-enablement hook BEFORE adapter + construction, so ``gateway status`` and ``get_connected_platforms()`` + reflect env-only configuration without instantiating the WebSocket + client. Returns ``None`` when SimpleX isn't minimally configured. + + The special ``home_channel`` key in the returned dict is handled by + the core hook — it becomes a proper ``HomeChannel`` dataclass on the + ``PlatformConfig`` rather than being merged into ``extra``. + """ + ws_url = os.getenv("SIMPLEX_WS_URL", "").strip() + if not ws_url: + return None + seed: dict = {"ws_url": ws_url} + home = os.getenv("SIMPLEX_HOME_CHANNEL", "").strip() + if home: + seed["home_channel"] = { + "chat_id": home, + "name": os.getenv("SIMPLEX_HOME_CHANNEL_NAME", "").strip() or home, + } + return seed + + +async def _standalone_send( + pconfig, + chat_id: str, + message: str, + *, + thread_id: Optional[str] = None, + media_files: Optional[List[str]] = None, + force_document: bool = False, +) -> Dict[str, Any]: + """Open an ephemeral WebSocket to the daemon, send, and close. + + Used by ``tools/send_message_tool._send_via_adapter`` when the gateway + runner is not in this process (e.g. ``hermes cron`` running as a + separate process from ``hermes gateway``). Without this hook, + ``deliver=simplex`` cron jobs fail with "No live adapter for platform". + + ``thread_id`` and ``force_document`` are accepted for signature parity + with other plugins but are not meaningful here. ``media_files`` is + accepted but only the text body is delivered — SimpleX requires the + daemon's filesystem-backed file flow which an ephemeral connection + cannot drive safely. + """ + try: + import websockets as _wsclient + except ImportError: + return {"error": "websockets not installed. Run: pip install websockets"} + + extra = getattr(pconfig, "extra", {}) or {} + ws_url = os.getenv("SIMPLEX_WS_URL") or extra.get("ws_url", "ws://127.0.0.1:5225") + if not ws_url: + return {"error": "SimpleX standalone send: SIMPLEX_WS_URL is required"} + + try: + if chat_id.startswith("group:"): + group_id = chat_id[6:] + cmd_str = f"#[{group_id}] {message}" + else: + cmd_str = f"@[{chat_id}] {message}" + + payload = { + "corrId": f"hermes-snd-{int(time.time() * 1000)}", + "cmd": cmd_str, + } + + async with _wsclient.connect(ws_url, open_timeout=10, close_timeout=5) as ws: + await ws.send(json.dumps(payload)) + # Give the daemon a moment to process the command before closing. + await asyncio.sleep(0.5) + + return {"success": True, "platform": "simplex", "chat_id": chat_id} + except Exception as e: + return {"error": f"SimpleX send failed: {e}"} + + +def interactive_setup() -> None: + """Minimal stdin wizard for ``hermes setup gateway`` → SimpleX. + + Prompts for the WebSocket URL and the optional allowlist / home channel. + Writes to ``~/.hermes/.env`` via ``hermes_cli.config``. + """ + print() + print("SimpleX Chat setup") + print("------------------") + print("Requirements:") + print(" 1. simplex-chat daemon running (e.g. `simplex-chat -p 5225`).") + print(" 2. Python package `websockets` installed (`pip install websockets`).") + print() + + try: + from hermes_cli.config import get_env_value, save_env_value + except ImportError: + print("hermes_cli.config not available; set SIMPLEX_* vars manually in ~/.hermes/.env") + return + + def _prompt(var: str, prompt: str, *, secret: bool = False) -> None: + existing = get_env_value(var) if callable(get_env_value) else None + suffix = " [keep current]" if existing else "" + try: + if secret: + import getpass + value = getpass.getpass(f"{prompt}{suffix}: ") + else: + value = input(f"{prompt}{suffix}: ").strip() + except (EOFError, KeyboardInterrupt): + print() + return + if value: + save_env_value(var, value) + + _prompt("SIMPLEX_WS_URL", "Daemon WebSocket URL (default ws://127.0.0.1:5225)") + _prompt("SIMPLEX_ALLOWED_USERS", "Allowed contact IDs (comma-separated; blank=skip)") + _prompt("SIMPLEX_HOME_CHANNEL", "Home channel contact/group ID (or empty)") + print("Done. Make sure the simplex-chat daemon is running before starting the gateway.") + + +def register(ctx) -> None: + """Plugin entry point — called by the Hermes plugin system at startup.""" + ctx.register_platform( + name="simplex", + label="SimpleX Chat", + adapter_factory=lambda cfg: SimplexAdapter(cfg), + check_fn=check_requirements, + validate_config=validate_config, + is_connected=is_connected, + required_env=["SIMPLEX_WS_URL"], + install_hint="pip install websockets # SimpleX adapter requires the websockets package", + setup_fn=interactive_setup, + # Env-driven auto-configuration: seeds PlatformConfig.extra so + # env-only setups show up in `hermes gateway status` without + # instantiating the adapter. + env_enablement_fn=_env_enablement, + # Cron home-channel delivery support — `deliver=simplex` cron jobs + # route to SIMPLEX_HOME_CHANNEL when set. + cron_deliver_env_var="SIMPLEX_HOME_CHANNEL", + # Out-of-process cron delivery. Without this hook, deliver=simplex + # cron jobs fail with "No live adapter" when cron runs separately + # from the gateway. + standalone_sender_fn=_standalone_send, + # Auth env vars for _is_user_authorized() integration + allowed_users_env="SIMPLEX_ALLOWED_USERS", + allow_all_env="SIMPLEX_ALLOW_ALL_USERS", + # SimpleX has no hard line length; we still chunk for sanity. + max_message_length=MAX_MESSAGE_LENGTH, + # Display + emoji="🔒", + # SimpleX uses opaque contact IDs only — no phone numbers or + # email addresses to redact. + pii_safe=True, + allow_update_command=True, + # LLM guidance + platform_hint=( + "You are chatting via SimpleX Chat, a private decentralised " + "messenger. Contacts are identified by opaque internal IDs, " + "not phone numbers or usernames. SimpleX supports standard " + "markdown formatting. There is no typing indicator and no " + "hard message length limit, but keep responses conversational." + ), + ) diff --git a/plugins/platforms/simplex/plugin.yaml b/plugins/platforms/simplex/plugin.yaml new file mode 100644 index 00000000000..2bb87641b63 --- /dev/null +++ b/plugins/platforms/simplex/plugin.yaml @@ -0,0 +1,37 @@ +name: simplex-platform +label: SimpleX Chat +kind: platform +version: 1.0.0 +description: > + SimpleX Chat gateway adapter for Hermes Agent. + Connects to a local simplex-chat daemon via WebSocket and relays + messages between SimpleX contacts/groups and the Hermes agent. + SimpleX is decentralised and assigns no persistent user IDs — + every contact is an opaque internal ID generated at connection + time, making it one of the most private messengers available. +author: Mibayy +# ``requires_env`` and ``optional_env`` entries are surfaced in the +# ``hermes config`` UI via the platform-plugin env var injector in +# ``hermes_cli/config.py``. +requires_env: + - name: SIMPLEX_WS_URL + description: "WebSocket URL of the simplex-chat daemon (e.g. ws://127.0.0.1:5225)" + prompt: "SimpleX daemon WebSocket URL" + password: false +optional_env: + - name: SIMPLEX_ALLOWED_USERS + description: "Comma-separated SimpleX contact IDs allowed to talk to the bot" + prompt: "Allowed contact IDs (comma-separated)" + password: false + - name: SIMPLEX_ALLOW_ALL_USERS + description: "Allow any contact to talk to the bot (dev only — disables allowlist)" + prompt: "Allow all contacts? (true/false)" + password: false + - name: SIMPLEX_HOME_CHANNEL + description: "Default contact/group ID for cron / notification delivery" + prompt: "Home channel contact/group ID (or empty)" + password: false + - name: SIMPLEX_HOME_CHANNEL_NAME + description: "Human label for the home channel (defaults to the ID)" + prompt: "Home channel display name (or empty)" + password: false diff --git a/tests/gateway/test_simplex_plugin.py b/tests/gateway/test_simplex_plugin.py new file mode 100644 index 00000000000..0b1b1b21a85 --- /dev/null +++ b/tests/gateway/test_simplex_plugin.py @@ -0,0 +1,347 @@ +"""Tests for the SimpleX Chat platform-plugin adapter. + +Loaded via the ``_plugin_adapter_loader`` helper so this lives under +``plugin_adapter_simplex`` in ``sys.modules`` and cannot collide with +sibling platform-plugin tests on the same xdist worker. +""" + +from __future__ import annotations + +import json +import os +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from tests.gateway._plugin_adapter_loader import load_plugin_adapter + +_simplex = load_plugin_adapter("simplex") + +SimplexAdapter = _simplex.SimplexAdapter +check_requirements = _simplex.check_requirements +validate_config = _simplex.validate_config +is_connected = _simplex.is_connected +register = _simplex.register +_env_enablement = _simplex._env_enablement +_standalone_send = _simplex._standalone_send +_guess_extension = _simplex._guess_extension +_is_image_ext = _simplex._is_image_ext +_is_audio_ext = _simplex._is_audio_ext +_CORR_PREFIX = _simplex._CORR_PREFIX + + +# --------------------------------------------------------------------------- +# 1. Platform enum (plugin-discovered, not bundled) +# --------------------------------------------------------------------------- + +def test_platform_enum_resolves_via_plugin_scan(): + """The plugin filesystem scan should expose Platform("simplex").""" + from gateway.config import Platform + p = Platform("simplex") + assert p.value == "simplex" + # Identity stability — repeated lookups return the same pseudo-member + assert Platform("simplex") is p + + +# --------------------------------------------------------------------------- +# 2. check_requirements / validate_config / is_connected +# --------------------------------------------------------------------------- + +def test_check_requirements_needs_url(monkeypatch): + monkeypatch.delenv("SIMPLEX_WS_URL", raising=False) + assert check_requirements() is False + + +def test_check_requirements_true_when_configured(monkeypatch): + monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225") + # websockets is a dev dep in this repo via the test plugins; the + # check_requirements() gate also asserts the package imports. + websockets_present = True + try: + import websockets # noqa: F401 + except ImportError: + websockets_present = False + assert check_requirements() is websockets_present + + +def test_validate_config_uses_env_or_extra(): + from gateway.config import PlatformConfig + # Empty extra + no env → invalid + cfg = PlatformConfig(enabled=True) + assert validate_config(cfg) is False + # extra-only path → valid + cfg2 = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + assert validate_config(cfg2) is True + + +def test_is_connected_mirrors_validate(monkeypatch): + from gateway.config import PlatformConfig + monkeypatch.delenv("SIMPLEX_WS_URL", raising=False) + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://x"}) + assert is_connected(cfg) is True + assert is_connected(PlatformConfig(enabled=True)) is False + + +# --------------------------------------------------------------------------- +# 3. _env_enablement seeds PlatformConfig.extra +# --------------------------------------------------------------------------- + +def test_env_enablement_none_when_unset(monkeypatch): + monkeypatch.delenv("SIMPLEX_WS_URL", raising=False) + assert _env_enablement() is None + + +def test_env_enablement_seeds_ws_url(monkeypatch): + monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225") + monkeypatch.delenv("SIMPLEX_HOME_CHANNEL", raising=False) + seed = _env_enablement() + assert seed == {"ws_url": "ws://127.0.0.1:5225"} + + +def test_env_enablement_seeds_home_channel(monkeypatch): + monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225") + monkeypatch.setenv("SIMPLEX_HOME_CHANNEL", "42") + monkeypatch.setenv("SIMPLEX_HOME_CHANNEL_NAME", "Personal") + seed = _env_enablement() + assert seed["home_channel"] == {"chat_id": "42", "name": "Personal"} + + +def test_env_enablement_home_channel_defaults_name_to_id(monkeypatch): + monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225") + monkeypatch.setenv("SIMPLEX_HOME_CHANNEL", "42") + monkeypatch.delenv("SIMPLEX_HOME_CHANNEL_NAME", raising=False) + seed = _env_enablement() + assert seed["home_channel"] == {"chat_id": "42", "name": "42"} + + +# --------------------------------------------------------------------------- +# 4. Adapter init +# --------------------------------------------------------------------------- + +def test_adapter_init_custom_url(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + assert adapter.ws_url == "ws://localhost:5225" + assert adapter._running is False + assert adapter._ws is None + + +def test_adapter_init_default_url(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True) + adapter = SimplexAdapter(cfg) + assert adapter.ws_url == "ws://127.0.0.1:5225" + + +def test_adapter_platform_identity(): + """Adapter should expose Platform("simplex") identity.""" + from gateway.config import Platform, PlatformConfig + cfg = PlatformConfig(enabled=True) + adapter = SimplexAdapter(cfg) + assert adapter.platform is Platform("simplex") + + +# --------------------------------------------------------------------------- +# 5. Helper functions (magic-byte detection) +# --------------------------------------------------------------------------- + +def test_guess_extension_png(): + assert _guess_extension(b"\x89PNG\r\n\x1a\n") == ".png" + + +def test_guess_extension_jpg(): + assert _guess_extension(b"\xff\xd8\xff\xe0") == ".jpg" + + +def test_guess_extension_ogg(): + assert _guess_extension(b"OggS\x00\x02") == ".ogg" + + +def test_guess_extension_unknown(): + assert _guess_extension(b"\x00\x01\x02\x03") == ".bin" + + +def test_is_image_ext(): + assert _is_image_ext(".png") is True + assert _is_image_ext(".webp") is True + assert _is_image_ext(".ogg") is False + + +def test_is_audio_ext(): + assert _is_audio_ext(".ogg") is True + assert _is_audio_ext(".mp3") is True + assert _is_audio_ext(".pdf") is False + + +# --------------------------------------------------------------------------- +# 6. Correlation IDs +# --------------------------------------------------------------------------- + +def test_corr_id_starts_with_prefix_and_tracks_pending(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + corr_id = adapter._make_corr_id() + assert corr_id.startswith(_CORR_PREFIX) + assert corr_id in adapter._pending_corr_ids + + +def test_corr_id_pending_set_self_trims(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + adapter._max_pending_corr = 4 + for _ in range(10): + adapter._make_corr_id() + # After many additions, the pending set should be bounded by the trim + # logic — at most one trim window above the cap. + assert len(adapter._pending_corr_ids) <= adapter._max_pending_corr + 1 + + +# --------------------------------------------------------------------------- +# 7. Outbound send (mocked WS) +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_send_dm(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + + mock_ws = AsyncMock() + adapter._ws = mock_ws + + result = await adapter.send("contact-42", "Hello, SimpleX!") + mock_ws.send.assert_called_once() + payload = json.loads(mock_ws.send.call_args[0][0]) + assert payload["cmd"] == "@[contact-42] Hello, SimpleX!" + assert payload["corrId"].startswith(_CORR_PREFIX) + assert result.success is True + + +@pytest.mark.asyncio +async def test_send_group(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + + mock_ws = AsyncMock() + adapter._ws = mock_ws + + result = await adapter.send("group:grp-99", "Hello, group!") + payload = json.loads(mock_ws.send.call_args[0][0]) + assert payload["cmd"] == "#[grp-99] Hello, group!" + assert result.success is True + + +@pytest.mark.asyncio +async def test_send_when_ws_not_connected_does_not_crash(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + # No _ws assigned — _send_ws should drop quietly + result = await adapter.send("contact-42", "hi") + assert result.success is True # send() always returns success — fire-and-forget + + +# --------------------------------------------------------------------------- +# 8. Inbound: filter own-echo by corrId prefix +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_handle_event_filters_own_corr_id(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + # Pretend we sent a command with this corrId + own = adapter._make_corr_id() + handler_mock = AsyncMock() + adapter._handle_new_chat_item = handler_mock # type: ignore + + await adapter._handle_event({"corrId": own, "type": "newChatItem"}) + handler_mock.assert_not_called() + assert own not in adapter._pending_corr_ids # discarded + + +# --------------------------------------------------------------------------- +# 9. Standalone (out-of-process) send for cron +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_standalone_send_missing_websockets(monkeypatch): + """When websockets is unimportable, return a clean error dict. + + Implementation detail: the standalone path does ``import websockets`` + inside the function body. We simulate the package being absent by + pulling it out of ``sys.modules`` and pointing the finder at None. + """ + import sys + saved_websockets = sys.modules.pop("websockets", None) + saved_meta = list(sys.meta_path) + + class _Blocker: + @staticmethod + def find_spec(name, path=None, target=None): + if name == "websockets" or name.startswith("websockets."): + raise ImportError("websockets blocked for test") + return None + + sys.meta_path.insert(0, _Blocker()) + try: + pconfig = MagicMock() + pconfig.extra = {"ws_url": "ws://localhost:5225"} + result = await _standalone_send(pconfig, "contact-42", "hi") + assert isinstance(result, dict) + assert "error" in result + assert "websockets" in result["error"] + finally: + sys.meta_path[:] = saved_meta + if saved_websockets is not None: + sys.modules["websockets"] = saved_websockets + + +@pytest.mark.asyncio +async def test_standalone_send_missing_url(monkeypatch): + monkeypatch.delenv("SIMPLEX_WS_URL", raising=False) + pconfig = MagicMock() + pconfig.extra = {} + # We expect the URL fallback (extra+env both empty) to be empty string, + # producing an error. We also need websockets to be importable for the + # url-check branch to be reached, so skip when it's not. + try: + import websockets.client # noqa: F401 + except ImportError: + pytest.skip("websockets not installed") + + result = await _standalone_send(pconfig, "contact-42", "hi") + assert isinstance(result, dict) + # Either error about URL or a connection attempt failure — both are valid + # signals that the standalone path requires configuration. + assert "error" in result + + +# --------------------------------------------------------------------------- +# 10. register() — plugin-side metadata +# --------------------------------------------------------------------------- + +def test_register_calls_register_platform(): + ctx = MagicMock() + register(ctx) + ctx.register_platform.assert_called_once() + kwargs = ctx.register_platform.call_args.kwargs + assert kwargs["name"] == "simplex" + assert kwargs["label"] == "SimpleX Chat" + assert kwargs["required_env"] == ["SIMPLEX_WS_URL"] + assert kwargs["allowed_users_env"] == "SIMPLEX_ALLOWED_USERS" + assert kwargs["allow_all_env"] == "SIMPLEX_ALLOW_ALL_USERS" + assert kwargs["cron_deliver_env_var"] == "SIMPLEX_HOME_CHANNEL" + assert callable(kwargs["check_fn"]) + assert callable(kwargs["validate_config"]) + assert callable(kwargs["is_connected"]) + assert callable(kwargs["env_enablement_fn"]) + assert callable(kwargs["standalone_sender_fn"]) + assert callable(kwargs["adapter_factory"]) + assert callable(kwargs["setup_fn"]) + # SimpleX uses opaque IDs only — no PII to redact. + assert kwargs["pii_safe"] is True diff --git a/website/docs/user-guide/messaging/simplex.md b/website/docs/user-guide/messaging/simplex.md new file mode 100644 index 00000000000..60853acd9f8 --- /dev/null +++ b/website/docs/user-guide/messaging/simplex.md @@ -0,0 +1,99 @@ +# SimpleX Chat + +[SimpleX Chat](https://simplex.chat/) is a private, decentralised messaging platform where users own their contacts and groups. Unlike other platforms, SimpleX assigns no persistent user IDs — every contact is identified by an opaque internal ID generated at connection time, which makes it one of the most private messengers available. + +## Prerequisites + +- The **simplex-chat** CLI installed and running as a daemon +- Python package **websockets** (`pip install websockets`) + +## Install simplex-chat + +Download the latest release from the [simplex-chat GitHub releases](https://github.com/simplex-chat/simplex-chat/releases) page, or via Docker: + +```bash +# Linux / macOS binary +curl -L https://github.com/simplex-chat/simplex-chat/releases/latest/download/simplex-chat-ubuntu-22_04-x86-64 -o simplex-chat +chmod +x simplex-chat + +# Or Docker +docker run -p 5225:5225 simplexchat/simplex-chat -p 5225 +``` + +## Start the daemon + +```bash +simplex-chat -p 5225 +``` + +The daemon listens on WebSocket at `ws://127.0.0.1:5225` by default. + +## Configure Hermes + +### Via setup wizard + +```bash +hermes setup gateway +``` + +Select **SimpleX Chat** and follow the prompts. + +### Via environment variables + +Add these to `~/.hermes/.env`: + +``` +SIMPLEX_WS_URL=ws://127.0.0.1:5225 +SIMPLEX_ALLOWED_USERS=, +SIMPLEX_HOME_CHANNEL= +``` + +| Variable | Required | Description | +|---|---|---| +| `SIMPLEX_WS_URL` | Yes | WebSocket URL of the simplex-chat daemon | +| `SIMPLEX_ALLOWED_USERS` | Recommended | Comma-separated contact IDs allowed to use the agent | +| `SIMPLEX_ALLOW_ALL_USERS` | Optional | Set `true` to allow every contact (use carefully) | +| `SIMPLEX_HOME_CHANNEL` | Optional | Default contact ID for cron job delivery | +| `SIMPLEX_HOME_CHANNEL_NAME` | Optional | Human label for the home channel | + +## Find your contact ID + +After starting the daemon, open a conversation with your agent contact. The contact ID will appear in session logs or via `hermes send_message action=list`. + +## Authorization + +By default **all contacts are denied**. You must either: + +1. Set `SIMPLEX_ALLOWED_USERS` to a comma-separated list of contact IDs, or +2. Use **DM pairing** — send any message to the bot and it will reply with a pairing code. Enter that code via `hermes gateway pair`. + +## Using SimpleX with cron jobs + +```python +cronjob( + action="create", + schedule="every 1h", + deliver="simplex", # uses SIMPLEX_HOME_CHANNEL + prompt="Check for alerts and summarise." +) +``` + +Or target a specific contact: + +```python +send_message(target="simplex:", message="Done!") +``` + +## Privacy notes + +- SimpleX never reveals phone numbers or email addresses — contacts use opaque IDs +- The connection between Hermes and the daemon is local WebSocket (`ws://127.0.0.1:5225`) — no data leaves your machine +- Messages are end-to-end encrypted by the SimpleX protocol before reaching the daemon + +## Troubleshooting + +**"Cannot reach daemon"** — Ensure `simplex-chat -p 5225` is running and the port matches `SIMPLEX_WS_URL`. + +**"websockets not installed"** — Run `pip install websockets`. + +**Messages not received** — Check that the contact's ID is in `SIMPLEX_ALLOWED_USERS` or approve them via DM pairing. From 47614dbfca86afd9e6cf29dbd8aa4effda0932c9 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:33:59 -0700 Subject: [PATCH 023/218] chore: wire simplex docs into sidebar + AUTHOR_MAP - Adds plugins/platforms/simplex docs page to the messaging sidebar between LINE and Open WebUI. - Maps louismichalot@hotmail.com -> Mibayy in scripts/release.py so the attribution check on the salvage PR passes. --- scripts/release.py | 1 + website/sidebars.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index b0e1fda9686..7d761d4aa80 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -398,6 +398,7 @@ AUTHOR_MAP = { "Mibayy@users.noreply.github.com": "Mibayy", "mibayy@users.noreply.github.com": "Mibayy", "mibay@clawhub.io": "Mibayy", + "louismichalot@hotmail.com": "Mibayy", "135070653+sgaofen@users.noreply.github.com": "sgaofen", "lzy.dev@gmail.com": "zhiyanliu", "me@janstepanovsky.cz": "hhhonzik", diff --git a/website/sidebars.ts b/website/sidebars.ts index a2977c87eef..a8d893d6e72 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -145,6 +145,7 @@ const sidebars: SidebarsConfig = { 'user-guide/messaging/teams-meetings', 'user-guide/messaging/msgraph-webhook', 'user-guide/messaging/line', + 'user-guide/messaging/simplex', 'user-guide/messaging/open-webui', 'user-guide/messaging/webhooks', ], From b6e07417c5242f7a3d6af1c8d8f0173248b4253f Mon Sep 17 00:00:00 2001 From: Mibayy Date: Fri, 15 May 2026 01:39:13 -0700 Subject: [PATCH 024/218] feat(cli): show YOLO mode warning in banner and status bar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running with --yolo, all dangerous command approvals are bypassed. Make this state visible so users don't forget: - Banner: '⚠ YOLO mode — all approval prompts bypassed' line in red, only shown when YOLO is active. Default case is silent (no extra line, no always-on 'restricted' label). - Status bar: '⚠ YOLO' fragment appended in red (#FF4444 bold) across all three width tiers (<52, <76, ≥76) in both the plain-text fallback and the fragments builder. Closes #2663 Co-authored-by: Mibayy --- cli.py | 22 ++++++++++++++++++++-- hermes_cli/banner.py | 3 +++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/cli.py b/cli.py index 527269aef7a..27286a3c988 100644 --- a/cli.py +++ b/cli.py @@ -3370,8 +3370,11 @@ class HermesCLI: percent_label = f"{percent}%" if percent is not None else "--" duration_label = snapshot["duration"] + yolo_active = bool(os.getenv("HERMES_YOLO_MODE")) if width < 52: text = f"⚕ {snapshot['model_short']} · {duration_label}" + if yolo_active: + text += " · ⚠ YOLO" return self._trim_status_bar_text(text, width) if width < 76: parts = [f"⚕ {snapshot['model_short']}", percent_label] @@ -3379,6 +3382,8 @@ class HermesCLI: if compressions: parts.append(f"🗜️ {compressions}") parts.append(duration_label) + if yolo_active: + parts.append("⚠ YOLO") return self._trim_status_bar_text(" · ".join(parts), width) if snapshot["context_length"]: @@ -3396,6 +3401,8 @@ class HermesCLI: prompt_elapsed = snapshot.get("prompt_elapsed") if prompt_elapsed: parts.append(prompt_elapsed) + if yolo_active: + parts.append("⚠ YOLO") return self._trim_status_bar_text(" │ ".join(parts), width) except Exception: return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}" @@ -3412,6 +3419,7 @@ class HermesCLI: # line and produce duplicated status bar rows over long sessions. width = self._get_tui_terminal_width() duration_label = snapshot["duration"] + yolo_active = bool(os.getenv("HERMES_YOLO_MODE")) if width < 52: frags = [ @@ -3419,8 +3427,11 @@ class HermesCLI: ("class:status-bar-strong", snapshot["model_short"]), ("class:status-bar-dim", " · "), ("class:status-bar-dim", duration_label), - ("class:status-bar", " "), ] + if yolo_active: + frags.append(("class:status-bar-dim", " · ")) + frags.append(("class:status-bar-yolo", "⚠ YOLO")) + frags.append(("class:status-bar", " ")) else: percent = snapshot["context_percent"] percent_label = f"{percent}%" if percent is not None else "--" @@ -3438,8 +3449,11 @@ class HermesCLI: frags.extend([ ("class:status-bar-dim", " · "), ("class:status-bar-dim", duration_label), - ("class:status-bar", " "), ]) + if yolo_active: + frags.append(("class:status-bar-dim", " · ")) + frags.append(("class:status-bar-yolo", "⚠ YOLO")) + frags.append(("class:status-bar", " ")) else: if snapshot["context_length"]: ctx_total = _format_context_length(snapshot["context_length"]) @@ -3472,6 +3486,9 @@ class HermesCLI: if prompt_elapsed: frags.append(("class:status-bar-dim", " │ ")) frags.append(("class:status-bar-dim", prompt_elapsed)) + if yolo_active: + frags.append(("class:status-bar-dim", " │ ")) + frags.append(("class:status-bar-yolo", "⚠ YOLO")) frags.append(("class:status-bar", " ")) total_width = sum(self._status_bar_display_width(text) for _, text in frags) @@ -13344,6 +13361,7 @@ class HermesCLI: 'status-bar-warn': 'bg:#1a1a2e #FFD700 bold', 'status-bar-bad': 'bg:#1a1a2e #FF8C00 bold', 'status-bar-critical': 'bg:#1a1a2e #FF6B6B bold', + 'status-bar-yolo': 'bg:#1a1a2e #FF4444 bold', # Bronze horizontal rules around the input area 'input-rule': '#CD7F32', # Clipboard image attachment badges diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index c4ec348ef48..036412ac072 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -470,6 +470,9 @@ def build_welcome_banner(console: Console, model: str, cwd: str, model_short = model_short[:25] + "..." ctx_str = f" [dim {dim}]·[/] [dim {dim}]{_format_context_length(context_length)} context[/]" if context_length else "" left_lines.append(f"[{accent}]{model_short}[/]{ctx_str} [dim {dim}]·[/] [dim {dim}]Nous Research[/]") + + if os.getenv("HERMES_YOLO_MODE"): + left_lines.append(f"[bold red]⚠ YOLO mode[/] [dim {dim}]— all approval prompts bypassed[/]") left_lines.append(f"[dim {dim}]{cwd}[/]") if session_id: left_lines.append(f"[dim {session_color}]Session: {session_id}[/]") From 4f8aaf10465566008499e65937f659a29f1ba6ab Mon Sep 17 00:00:00 2001 From: InB4DevOps Date: Fri, 15 May 2026 01:40:03 -0700 Subject: [PATCH 025/218] perf(run_agent): accumulate length-continuation prefix via list+join MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace O(n²) string concatenation of truncated_response_prefix in the length-continuation retry loop with a list + ''.join(). Functionally equivalent: same partial response on early return, same prepend on final assembly. The legacy retry path is capped at 3 iterations, so the practical wall-clock win is small, but the new idiom matches the rest of the codebase and removes a needless repeated allocation. Salvaged from PR #2717 (the run_conversation portion only — trajectory refactor dropped because it silently rewrote to ). Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com> --- run_agent.py | 12 ++++++------ .../test_anthropic_truncation_continuation.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/run_agent.py b/run_agent.py index 325e1e13ef3..18ca03bd512 100644 --- a/run_agent.py +++ b/run_agent.py @@ -12207,7 +12207,7 @@ class AIAgent: codex_ack_continuations = 0 length_continue_retries = 0 truncated_tool_call_retries = 0 - truncated_response_prefix = "" + truncated_response_parts: List[str] = [] compression_attempts = 0 _turn_exit_reason = "unknown" # Diagnostic: why the loop ended @@ -13100,7 +13100,7 @@ class AIAgent: interim_msg = self._build_assistant_message(assistant_message, finish_reason) messages.append(interim_msg) if assistant_message.content: - truncated_response_prefix += assistant_message.content + truncated_response_parts.append(assistant_message.content) if length_continue_retries < 3: self._vprint( @@ -13121,7 +13121,7 @@ class AIAgent: restart_with_length_continuation = True break - partial_response = self._strip_think_blocks(truncated_response_prefix).strip() + partial_response = self._strip_think_blocks("".join(truncated_response_parts)).strip() self._cleanup_task_resources(effective_task_id) self._persist_session(messages, conversation_history) return { @@ -15325,9 +15325,9 @@ class AIAgent: codex_ack_continuations = 0 - if truncated_response_prefix: - final_response = truncated_response_prefix + final_response - truncated_response_prefix = "" + if truncated_response_parts: + final_response = "".join(truncated_response_parts) + final_response + truncated_response_parts = [] length_continue_retries = 0 final_response = self._strip_think_blocks(final_response).strip() diff --git a/tests/run_agent/test_anthropic_truncation_continuation.py b/tests/run_agent/test_anthropic_truncation_continuation.py index b7a263f1649..872015bc0bc 100644 --- a/tests/run_agent/test_anthropic_truncation_continuation.py +++ b/tests/run_agent/test_anthropic_truncation_continuation.py @@ -59,7 +59,7 @@ class TestTruncatedAnthropicResponseNormalization: nr = get_transport("anthropic_messages").normalize_response(response) # The continuation block checks these two attributes: - # assistant_message.content → appended to truncated_response_prefix + # assistant_message.content → appended to truncated_response_parts # assistant_message.tool_calls → guards the text-retry branch assert nr.content is not None assert "partial response" in nr.content From 647cc0bb0db4328b941008b290dcb986cdd18c54 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:40:07 -0700 Subject: [PATCH 026/218] chore(release): add AUTHOR_MAP entries for InB4DevOps --- scripts/release.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index 7d761d4aa80..8d2c6c16990 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -89,6 +89,8 @@ AUTHOR_MAP = { "zhanganzhe@tenclass.com": "luoyuctl", "51604064+luoyuctl@users.noreply.github.com": "luoyuctl", "127238744+teknium1@users.noreply.github.com": "teknium1", + "tolle.lege+github@gmail.com": "InB4DevOps", + "73686890+InB4DevOps@users.noreply.github.com": "InB4DevOps", "147827411+EloquentBrush@users.noreply.github.com": "AhmetArif0", "97489706+purzbeats@users.noreply.github.com": "purzbeats", "hugosequier@gmail.com": "Hugo-SEQUIER", From 5360b542447daaf0ba8d0f7c3cf0be1751ca0008 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:38:30 -0700 Subject: [PATCH 027/218] fix(providers): set User-Agent on ProviderProfile.fetch_models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some catalog endpoints (OpenCode Zen, etc.) sit behind a WAF that returns 403 for the default Python-urllib/ User-Agent. The generic profile-based live fetch in providers/base.py was silently failing for any such provider — falling through to the static catalog and missing newly-launched models. Set a generic 'hermes-cli/' UA on the catalog probe so every api_key provider profile benefits. Verified live against opencode-zen: before this change, profile.fetch_models() raised HTTP 403; after, it returns 42 models including gpt-5.5, gpt-5.5-pro, kimi-k2.6, glm-5.1 and the *-free variants the static catalog doesn't list. Also strip the now-stale comment in validate_requested_model() claiming opencode-zen's /models returns 404 against the HTML marketing site — the API endpoint at /zen/v1/models returns 200 with valid JSON. Surfaced by #2651 (@aashizpoudel) — fixes the same user-facing gap their PR targeted, applied at the right layer so all api_key provider profiles get live catalogs through the same code path. Co-authored-by: Aashish Poudel --- hermes_cli/models.py | 13 ++++++------- providers/base.py | 18 ++++++++++++++++++ scripts/release.py | 2 ++ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 1ffede636a1..bc41132f5d5 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -3702,13 +3702,12 @@ def validate_requested_model( # Static-catalog fallback: when the /models probe was unreachable, # validate against the curated list from provider_model_ids() — same - # pattern as the openai-codex and minimax branches above. This fixes - # /model switches in the gateway for providers like opencode-go and - # opencode-zen whose /models endpoint returns 404 against the HTML - # marketing site. Without this block, validate_requested_model would - # reject every model on such providers, switch_model() would return - # success=False, and the gateway would never write to - # _session_model_overrides. + # pattern as the openai-codex and minimax branches above. This keeps + # /model switches working in the gateway for providers whose /models + # endpoint is temporarily unreachable or returns a non-JSON payload. + # Without this block, validate_requested_model would reject every model + # on such providers, switch_model() would return success=False, and + # the gateway would never write to _session_model_overrides. provider_label = _PROVIDER_LABELS.get(normalized, normalized) try: catalog_models = provider_model_ids(normalized) diff --git a/providers/base.py b/providers/base.py index a9e76823bb2..fa6765d103c 100644 --- a/providers/base.py +++ b/providers/base.py @@ -21,6 +21,20 @@ logger = logging.getLogger(__name__) OMIT_TEMPERATURE = object() +def _profile_user_agent() -> str: + """Return a ``hermes-cli/`` UA string, with a stable fallback. + + Used by ``ProviderProfile.fetch_models`` so the catalog probe is not + served the default ``Python-urllib/`` UA — some providers + (OpenCode Zen, etc.) sit behind a WAF that returns 403 for that. + """ + try: + from hermes_cli import __version__ as _ver # lazy: avoid layer cycle at import time + return f"hermes-cli/{_ver}" + except Exception: + return "hermes-cli" + + @dataclass class ProviderProfile: """Base provider profile — subclass or instantiate with overrides.""" @@ -153,6 +167,10 @@ class ProviderProfile: if api_key: req.add_header("Authorization", f"Bearer {api_key}") req.add_header("Accept", "application/json") + # Some providers (e.g. OpenCode Zen) sit behind a WAF that blocks + # the default ``Python-urllib/`` User-Agent. Set a generic + # hermes-cli UA so the catalog endpoint is reachable. + req.add_header("User-Agent", _profile_user_agent()) for k, v in self.default_headers.items(): req.add_header(k, v) diff --git a/scripts/release.py b/scripts/release.py index 8d2c6c16990..21587212b02 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -58,6 +58,8 @@ AUTHOR_MAP = { "altriatree@gmail.com": "TruaShamu", "m@mobrienv.dev": "mikeyobrien", "qiyin.zuo@pcitc.com": "qiyin-code", + "mr.aashiz@gmail.com": "aashizpoudel", + "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel", "oleksii.lisikh@gmail.com": "olisikh", "jeremy@geocaching.com": "outdoorsea", "leone.parise@gmail.com": "leoneparise", From 55f3262e788bdd7dd6adcab1d515d476b6cb9321 Mon Sep 17 00:00:00 2001 From: Animesh Mishra Date: Tue, 24 Mar 2026 07:20:51 +0000 Subject: [PATCH 028/218] fix(mcp): pre-compile env-var regex and unify interpolation Remove redundant inner `import re` and regex recompilation on every call in _interpolate_env_vars. Add module-level _ENV_VAR_PATTERN compiled once. Replace the separate _interpolate_value() in mcp_config.py (which used \w+ and would silently fail on env vars containing hyphens or dots) with the shared _ENV_VAR_PATTERN from mcp_tool.py. Remove now-unused import re. --- hermes_cli/mcp_config.py | 10 ++-------- tools/mcp_tool.py | 7 ++++++- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/hermes_cli/mcp_config.py b/hermes_cli/mcp_config.py index 8c12ad70758..ed9d7b5f6db 100644 --- a/hermes_cli/mcp_config.py +++ b/hermes_cli/mcp_config.py @@ -25,6 +25,7 @@ from hermes_cli.config import ( ) from hermes_cli.colors import Colors, color from hermes_constants import display_hermes_home +from tools.mcp_tool import _ENV_VAR_PATTERN logger = logging.getLogger(__name__) @@ -551,7 +552,7 @@ def cmd_mcp_test(args): for k, v in headers.items(): if isinstance(v, str) and ("key" in k.lower() or "auth" in k.lower()): # Mask the value - resolved = _interpolate_value(v) + resolved = _ENV_VAR_PATTERN.sub(lambda m: os.getenv(m.group(1), ""), v) if len(resolved) > 8: masked = resolved[:4] + "***" + resolved[-4:] else: @@ -581,13 +582,6 @@ def cmd_mcp_test(args): print() -def _interpolate_value(value: str) -> str: - """Resolve ``${ENV_VAR}`` references in a string.""" - def _replace(m): - return os.getenv(m.group(1), "") - return re.sub(r"\$\{(\w+)\}", _replace, value) - - # ─── hermes mcp login ──────────────────────────────────────────────────────── def cmd_mcp_login(args): diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index ee1843043dc..c2668395e5d 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -279,6 +279,11 @@ _CREDENTIAL_PATTERN = re.compile( re.IGNORECASE, ) +# Pre-compiled pattern for ${VAR_NAME} style env-var interpolation. +# Supports any non-} characters in the variable name (hyphens, dots, etc.) +# so providers like MY-VAR or my.var work correctly. +_ENV_VAR_PATTERN = re.compile(r"\$\{([^}]+)\}") + # --------------------------------------------------------------------------- # Security helpers @@ -2104,7 +2109,7 @@ def _interpolate_env_vars(value): if isinstance(value, str): def _replace(m): return os.environ.get(m.group(1), m.group(0)) - return re.sub(r"\$\{([^}]+)\}", _replace, value) + return _ENV_VAR_PATTERN.sub(_replace, value) if isinstance(value, dict): return {k: _interpolate_env_vars(v) for k, v in value.items()} if isinstance(value, list): From 59c7cc64f0265195fa15a400411f381dd20b8b4e Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:42:35 -0700 Subject: [PATCH 029/218] chore(release): add AUTHOR_MAP entry for amethystani --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 21587212b02..38392742d43 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -115,6 +115,7 @@ AUTHOR_MAP = { "oswaldb22@users.noreply.github.com": "oswaldb22", "abdielv@proton.me": "AJV20", "mason@growagainorchids.com": "masonjames", + "108541149+amethystani@users.noreply.github.com": "amethystani", "ytchen0719@gmail.com": "liquidchen", "am@studio1.tailb672fe.ts.net": "subtract0", "mike@grossmann.at": "ReqX", From c4a21d783131b04da443be6b624e20bb3b5b87b7 Mon Sep 17 00:00:00 2001 From: nidhi-singh02 Date: Tue, 24 Mar 2026 14:42:16 +0530 Subject: [PATCH 030/218] fix(cli): log swallowed exception in runtime model auto-detection Replaces bare `except Exception: pass` with debug-level logging so failures in local endpoint model discovery are diagnosable instead of silently hidden. --- hermes_cli/runtime_provider.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 4ac21ea4568..d7c30fe5648 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -102,8 +102,10 @@ def _auto_detect_local_model(base_url: str) -> str: model_id = models[0].get("id", "") if model_id: return model_id - except Exception: - pass + except Exception as exc: + # Log instead of silently swallowing — aids debugging when + # local model auto-detection fails unexpectedly. + logger.debug("Auto-detect model from %s failed: %s", base_url, exc) return "" From 5301cc212bb72b634fcb4da7bf4380c43d4b3dca Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:46:46 -0700 Subject: [PATCH 031/218] chore(release): add AUTHOR_MAP entry for nidhi-singh02 --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 38392742d43..7606d058677 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -59,6 +59,7 @@ AUTHOR_MAP = { "m@mobrienv.dev": "mikeyobrien", "qiyin.zuo@pcitc.com": "qiyin-code", "mr.aashiz@gmail.com": "aashizpoudel", + "nidhi2894@gmail.com": "nidhi-singh02", "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel", "oleksii.lisikh@gmail.com": "olisikh", "jeremy@geocaching.com": "outdoorsea", From eacb398f755b6ee102e75c6d62aed5a9b253e29d Mon Sep 17 00:00:00 2001 From: Nidhi Singh Date: Fri, 15 May 2026 01:49:35 -0700 Subject: [PATCH 032/218] fix(tools): add return_exceptions to asyncio.gather in web_tools Three asyncio.gather() calls in tools/web_tools.py ran without return_exceptions=True. A single failing task (e.g. LLM rate limit on one URL) would raise out of gather() and discard every other successfully fetched/summarized result. Pass return_exceptions=True and filter BaseException entries with a warning log before unpacking. Affects: - chunk summarization gather (large web_extract pages) - firecrawl per-result LLM post-processing - tavily crawl per-result LLM post-processing Closes #2744 --- tools/web_tools.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/tools/web_tools.py b/tools/web_tools.py index e2743248d22..597edb0c8fd 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -586,11 +586,20 @@ async def _process_large_content_chunked( # Run all chunk summarizations in parallel tasks = [summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)] - results = await asyncio.gather(*tasks) - - # Collect successful summaries in order + # Use return_exceptions=True so a single task failure does not discard + # all other successfully summarized chunks. + results = await asyncio.gather(*tasks, return_exceptions=True) + + # Filter out exceptions, then collect successful summaries in order + successful_results = [] + for result_item in results: + if isinstance(result_item, BaseException): + logger.warning("Chunk summarization task failed: %s", result_item) + continue + successful_results.append(result_item) + summaries = [] - for chunk_idx, summary in sorted(results, key=lambda x: x[0]): + for chunk_idx, summary in sorted(successful_results, key=lambda x: x[0]): if summary: summaries.append(f"## Section {chunk_idx + 1}\n{summary}") @@ -1038,10 +1047,16 @@ async def web_extract_tool( # Run all LLM processing in parallel results_list = response.get('results', []) tasks = [process_single_result(result) for result in results_list] - processed_results = await asyncio.gather(*tasks) - + # Use return_exceptions=True so a single task failure does not + # discard all other successfully processed results. + processed_results = await asyncio.gather(*tasks, return_exceptions=True) + # Collect metrics and print results - for result, metrics, status in processed_results: + for result_item in processed_results: + if isinstance(result_item, BaseException): + logger.warning("Web result processing task failed: %s", result_item) + continue + result, metrics, status = result_item url = result.get('url', 'Unknown URL') if status == "processed": debug_call_data["compression_metrics"].append(metrics) @@ -1285,8 +1300,14 @@ async def web_crawl_tool( return result, metrics, "too_short" tasks = [_process_tavily_crawl(r) for r in response.get('results', [])] - processed_results = await asyncio.gather(*tasks) - for result, metrics, status in processed_results: + # Use return_exceptions=True so a single task failure does not + # discard all other successfully processed crawl results. + processed_results = await asyncio.gather(*tasks, return_exceptions=True) + for result_item in processed_results: + if isinstance(result_item, BaseException): + logger.warning("Tavily crawl processing task failed: %s", result_item) + continue + result, metrics, status = result_item if status == "processed": debug_call_data["compression_metrics"].append(metrics) debug_call_data["pages_processed_with_llm"] += 1 From 94bdc63ff5f5329e5f2ab0ea213c07e3a7643aff Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 01:49:40 -0700 Subject: [PATCH 033/218] chore(release): add AUTHOR_MAP entry for nidhi-singh02 PR #2751 salvage. CI requires AUTHOR_MAP coverage for all contributor commit emails. --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 7606d058677..4a91762ebeb 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -223,6 +223,7 @@ AUTHOR_MAP = { "74749461+yuga-hashimoto@users.noreply.github.com": "yuga-hashimoto", "xiangyong@zspace.cn": "CES4751", "harish.kukreja@gmail.com": "counterposition", + "nidhi2894@gmail.com": "nidhi-singh02", "35294173+Fearvox@users.noreply.github.com": "Fearvox", "hypnus.yuan@gmail.com": "Hypnus-Yuan", "15558128926@qq.com": "xsfX20", From 837395685099b130a502db3ec25551475fe3c7cc Mon Sep 17 00:00:00 2001 From: nidhi-singh02 Date: Fri, 15 May 2026 01:49:56 -0700 Subject: [PATCH 034/218] fix(slack): guard split()[0] against whitespace-only command text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user sends a Slack message like '/hermes ' (trailing whitespace after the slash) the legacy subcommand router hit `text.split()[0]` with a truthy-but-whitespace-only `text`. `' '.split()` returns `[]` → IndexError, blowing up the slash handler before fallthrough to `/help`. Switch to a two-step guard that materializes the parts list first and indexes only if non-empty. Salvaged from PR #2752 by @nidhi-singh02. The PR's other two hunks (`tools/file_operations.py`, `agent/anthropic_adapter.py`) are unreachable in current code — `LINTERS` is a hardcoded constant dict with no empty values, and the anthropic version-detection site is already guarded by a `result.stdout.strip()` truthy check — so only the slack hunk is taken. Closes #2745 Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com> --- gateway/platforms/slack.py | 5 ++++- scripts/release.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py index ca34ab4acac..2116b569f96 100644 --- a/gateway/platforms/slack.py +++ b/gateway/platforms/slack.py @@ -2785,7 +2785,10 @@ class SlackAdapter(BasePlatformAdapter): from hermes_cli.commands import slack_subcommand_map subcommand_map = slack_subcommand_map() subcommand_map["compact"] = "/compress" - first_word = text.split()[0] if text else "" + # Guard against whitespace-only text where ``text`` is truthy but + # ``text.split()`` returns ``[]`` (e.g. user sends ``/hermes ``). + parts = text.split() if text else [] + first_word = parts[0] if parts else "" if first_word in subcommand_map: rest = text[len(first_word):].strip() text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word] diff --git a/scripts/release.py b/scripts/release.py index 4a91762ebeb..8a6f30802be 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -1071,6 +1071,8 @@ AUTHOR_MAP = { "37467487+yifengingit@users.noreply.github.com": "yifengingit", # PR #25589 salvage (AUTOINCREMENT id ordering) "89525629+vanthinh6886@users.noreply.github.com": "vanthinh6886", # PR #25562 salvage (.env 0600 perms) "16034932+Arkmusn@users.noreply.github.com": "Arkmusn", # PR #25559 salvage (approvals.timeout from config) + "nidhi2894@gmail.com": "nidhi-singh02", # PR #2752 salvage (slack whitespace-only IndexError guard) + "38173192+nidhi-singh02@users.noreply.github.com": "nidhi-singh02", } From 6af99423272ed67dd1f8d88bfdf762d4e5b77a2f Mon Sep 17 00:00:00 2001 From: aydnOktay Date: Tue, 24 Mar 2026 13:45:33 +0300 Subject: [PATCH 035/218] fix(url-safety): allow only http and https schemes --- tests/tools/test_url_safety.py | 8 ++++++++ tools/url_safety.py | 3 +++ 2 files changed, 11 insertions(+) diff --git a/tests/tools/test_url_safety.py b/tests/tools/test_url_safety.py index 38d27d40af3..5a0cceb2880 100644 --- a/tests/tools/test_url_safety.py +++ b/tests/tools/test_url_safety.py @@ -22,6 +22,14 @@ class TestIsSafeUrl: ]): assert is_safe_url("https://example.com/image.png") is True + def test_ftp_scheme_blocked(self): + """Only http/https should be allowed for fetch tools.""" + assert is_safe_url("ftp://example.com/file.txt") is False + + def test_missing_scheme_blocked(self): + """Bare host/path should be rejected to avoid ambiguous handling.""" + assert is_safe_url("example.com/path") is False + def test_localhost_blocked(self): with patch("socket.getaddrinfo", return_value=[ (2, 1, 6, "", ("127.0.0.1", 0)), diff --git a/tools/url_safety.py b/tools/url_safety.py index 743510b2757..0f3dd597e49 100644 --- a/tools/url_safety.py +++ b/tools/url_safety.py @@ -263,6 +263,9 @@ def is_safe_url(url: str) -> bool: parsed = urlparse(url) hostname = (parsed.hostname or "").strip().lower().rstrip(".") scheme = (parsed.scheme or "").strip().lower() + if scheme not in {"http", "https"}: + logger.warning("Blocked request — unsupported URL scheme: %s", scheme or "") + return False if not hostname: return False From 13c72fb486e6bfc047bfde93e54116ea7ef7adf4 Mon Sep 17 00:00:00 2001 From: nidhi-singh02 Date: Fri, 15 May 2026 01:51:41 -0700 Subject: [PATCH 036/218] fix(tools): wrap browser provider network calls with error handling Wrap requests.post() in create_session() for browser_use, browserbase, and firecrawl providers with requests.RequestException handling. Connection timeouts and DNS resolution failures now surface as clean RuntimeError messages instead of raw requests exception tracebacks. Browser Use managed-gateway mode preserves raw exception propagation so the existing idempotency-key retry semantics keep working. Closes #2746 Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com> --- tools/browser_providers/browser_use.py | 22 ++++++-- tools/browser_providers/browserbase.py | 77 ++++++++++++++------------ tools/browser_providers/firecrawl.py | 17 ++++-- 3 files changed, 68 insertions(+), 48 deletions(-) diff --git a/tools/browser_providers/browser_use.py b/tools/browser_providers/browser_use.py index 260249ef0bb..a1f4f425ba0 100644 --- a/tools/browser_providers/browser_use.py +++ b/tools/browser_providers/browser_use.py @@ -137,12 +137,22 @@ class BrowserUseProvider(CloudBrowserProvider): else {} ) - response = requests.post( - f"{config['base_url']}/browsers", - headers=headers, - json=payload, - timeout=30, - ) + try: + response = requests.post( + f"{config['base_url']}/browsers", + headers=headers, + json=payload, + timeout=30, + ) + except requests.RequestException as exc: + # Managed mode: propagate raw so callers can retry with the + # preserved idempotency key. Direct mode: wrap network failures + # into a clean RuntimeError for end users. + if managed_mode: + raise + raise RuntimeError( + f"Browser Use API connection failed: {exc}" + ) from exc if not response.ok: if managed_mode and not _should_preserve_pending_create_key(response): diff --git a/tools/browser_providers/browserbase.py b/tools/browser_providers/browserbase.py index 5076af4c7a6..4807345214b 100644 --- a/tools/browser_providers/browserbase.py +++ b/tools/browser_providers/browserbase.py @@ -92,45 +92,50 @@ class BrowserbaseProvider(CloudBrowserProvider): "X-BB-API-Key": config["api_key"], } - response = requests.post( - f"{config['base_url']}/v1/sessions", - headers=headers, - json=session_config, - timeout=30, - ) + try: + response = requests.post( + f"{config['base_url']}/v1/sessions", + headers=headers, + json=session_config, + timeout=30, + ) - proxies_fallback = False - keepalive_fallback = False + proxies_fallback = False + keepalive_fallback = False - # Handle 402 — paid features unavailable - if response.status_code == 402: - if enable_keep_alive: - keepalive_fallback = True - logger.warning( - "keepAlive may require paid plan (402), retrying without it. " - "Sessions may timeout during long operations." - ) - session_config.pop("keepAlive", None) - response = requests.post( - f"{config['base_url']}/v1/sessions", - headers=headers, - json=session_config, - timeout=30, - ) + # Handle 402 — paid features unavailable + if response.status_code == 402: + if enable_keep_alive: + keepalive_fallback = True + logger.warning( + "keepAlive may require paid plan (402), retrying without it. " + "Sessions may timeout during long operations." + ) + session_config.pop("keepAlive", None) + response = requests.post( + f"{config['base_url']}/v1/sessions", + headers=headers, + json=session_config, + timeout=30, + ) - if response.status_code == 402 and enable_proxies: - proxies_fallback = True - logger.warning( - "Proxies unavailable (402), retrying without proxies. " - "Bot detection may be less effective." - ) - session_config.pop("proxies", None) - response = requests.post( - f"{config['base_url']}/v1/sessions", - headers=headers, - json=session_config, - timeout=30, - ) + if response.status_code == 402 and enable_proxies: + proxies_fallback = True + logger.warning( + "Proxies unavailable (402), retrying without proxies. " + "Bot detection may be less effective." + ) + session_config.pop("proxies", None) + response = requests.post( + f"{config['base_url']}/v1/sessions", + headers=headers, + json=session_config, + timeout=30, + ) + except requests.RequestException as exc: + raise RuntimeError( + f"Browserbase API connection failed: {exc}" + ) from exc if not response.ok: raise RuntimeError( diff --git a/tools/browser_providers/firecrawl.py b/tools/browser_providers/firecrawl.py index 17001f72f1d..4a8ae82a2d2 100644 --- a/tools/browser_providers/firecrawl.py +++ b/tools/browser_providers/firecrawl.py @@ -47,12 +47,17 @@ class FirecrawlProvider(CloudBrowserProvider): body: Dict[str, object] = {"ttl": ttl} - response = requests.post( - f"{self._api_url()}/v2/browser", - headers=self._headers(), - json=body, - timeout=30, - ) + try: + response = requests.post( + f"{self._api_url()}/v2/browser", + headers=self._headers(), + json=body, + timeout=30, + ) + except requests.RequestException as exc: + raise RuntimeError( + f"Firecrawl API connection failed: {exc}" + ) from exc if not response.ok: raise RuntimeError( From 274217316e65bd7d4030b105548de30747526ec9 Mon Sep 17 00:00:00 2001 From: Steve Kelly Date: Thu, 14 May 2026 13:19:59 -0400 Subject: [PATCH 037/218] fix(codex-runtime): keep migrated root keys top-level --- hermes_cli/codex_runtime_plugin_migration.py | 40 +++++++++++++++---- .../test_codex_runtime_plugin_migration.py | 23 ++++++++++- 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/hermes_cli/codex_runtime_plugin_migration.py b/hermes_cli/codex_runtime_plugin_migration.py index dd7faa09794..49b4905d5b2 100644 --- a/hermes_cli/codex_runtime_plugin_migration.py +++ b/hermes_cli/codex_runtime_plugin_migration.py @@ -304,6 +304,37 @@ def render_codex_toml_section( return "\n".join(out) + "\n" +def _insert_managed_block_at_top_level(user_text: str, managed_block: str) -> str: + """Insert Hermes' managed Codex TOML block while keeping root keys root-scoped. + + TOML has no syntax to return to the document root after a table header. + Therefore appending a root key like `default_permissions = ...` after a + user table such as `[features]` actually creates `features.default_permissions`, + which Codex rejects. Insert the managed block before the first table header + so its root keys remain top-level, while preserving user content verbatim. + """ + if not user_text.strip(): + return managed_block + + lines = user_text.splitlines(keepends=True) + first_table_idx: Optional[int] = None + for idx, line in enumerate(lines): + stripped = line.lstrip() + if stripped.startswith("["): + first_table_idx = idx + break + + if first_table_idx is None: + prefix = user_text.rstrip("\n") + return f"{prefix}\n\n{managed_block}" if prefix else managed_block + + prefix = "".join(lines[:first_table_idx]).rstrip("\n") + suffix = "".join(lines[first_table_idx:]).lstrip("\n") + if prefix: + return f"{prefix}\n\n{managed_block}\n{suffix}" + return f"{managed_block}\n{suffix}" + + def _strip_existing_managed_block(toml_text: str) -> str: """Remove any prior managed section so re-runs idempotently replace it. @@ -571,14 +602,7 @@ def migrate( report.errors.append(f"could not read {target}: {exc}") return report without_managed = _strip_existing_managed_block(existing) - # Ensure exactly one blank line between user content and managed block - if without_managed and not without_managed.endswith("\n"): - without_managed += "\n" - new_text = ( - without_managed.rstrip("\n") + "\n\n" + managed_block - if without_managed.strip() - else managed_block - ) + new_text = _insert_managed_block_at_top_level(without_managed, managed_block) else: new_text = managed_block diff --git a/tests/hermes_cli/test_codex_runtime_plugin_migration.py b/tests/hermes_cli/test_codex_runtime_plugin_migration.py index b2e27f8c97b..c283a668681 100644 --- a/tests/hermes_cli/test_codex_runtime_plugin_migration.py +++ b/tests/hermes_cli/test_codex_runtime_plugin_migration.py @@ -567,10 +567,31 @@ class TestMigrate: assert "[model]" in new_text assert 'profile = "default"' in new_text assert "[providers.openai]" in new_text - # And new MCP block appended + # And new MCP block inserted without breaking user tables assert "[mcp_servers.a]" in new_text assert MIGRATION_MARKER in new_text + def test_managed_root_keys_stay_top_level_when_config_ends_in_table(self, tmp_path): + """TOML has no explicit 'leave current table' syntax. If Hermes appends + root keys like default_permissions after a user table such as [features], + Codex parses them as features.default_permissions and rejects the config. + The managed block must therefore be inserted before the first table.""" + import tomllib + + target = tmp_path / "config.toml" + target.write_text( + 'model = "gpt-5.5"\n' + "\n" + "[features]\n" + "terminal_resize_reflow = true\n" + ) + migrate({}, codex_home=tmp_path, discover_plugins=False, expose_hermes_tools=False) + new_text = target.read_text() + parsed = tomllib.loads(new_text) + assert parsed["default_permissions"] == ":workspace" + assert "default_permissions" not in parsed["features"] + assert new_text.index(MIGRATION_MARKER) < new_text.index("[features]") + def test_preserves_user_mcp_server_outside_managed_block(self, tmp_path): """Quirk #6: when a user adds their own MCP server entry directly to ~/.codex/config.toml outside Hermes' managed block, re-running From 77276070f5a1302908456734f2a5bdfe790260de Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Fri, 15 May 2026 14:45:31 +0530 Subject: [PATCH 038/218] fix(codex-runtime): de-dup [plugins.X] tables and stop leaking HERMES_HOME into config.toml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds on @steezkelly's Bug A fix (#25857, top-level default_permissions via _insert_managed_block_at_top_level) by addressing the other two config-corruption bugs described in #26250: Bug B (duplicate [plugins.X] tables) - Codex itself writes [plugins."@"] tables to config.toml when the user runs `codex plugins enable` directly, before hermes-agent's managed block exists. On the next migrate run, _query_codex_plugins() re-discovers the same plugins via plugin/list and render_codex_toml_section() re-emits them inside the managed block. Codex's strict TOML parser then rejects the duplicate table header on startup. - Add _strip_unmanaged_plugin_tables() that drops [plugins.*] tables from the user-content portion of the file. Only run it when plugin/list succeeded — if the RPC failed we can't re-emit and must preserve the user's tables. plugin/list is the source of truth when it answers. Bug C (HERMES_HOME pytest-tempdir leak into ~/.codex/config.toml) - _build_hermes_tools_mcp_entry() read HERMES_HOME directly from os.environ, so a sibling pytest's monkeypatch.setenv("HERMES_HOME", tmp_path) silently burned a transient pytest tempdir into the user's real ~/.codex/config.toml. After pytest reaped the tempdir, every codex-routed hermes-tools tool call failed silently. - Derive HERMES_HOME from get_hermes_home() (the canonical resolver that goes through the profile-aware path) and refuse to emit obvious test-tempdir paths via _looks_like_test_tempdir() as belt-and-suspenders for any other callsite that forgets to patch migrate(). - test_enable_succeeds_when_codex_present in test_codex_runtime_switch.py invoked the real migrate() (no mock), writing to Path.home() / .codex using whatever HERMES_HOME the running pytest session had set. Add the same migrate patch the other apply() tests already use, so the suite stops touching the user's real ~/.codex/config.toml. E2E verification (replicating the issue's repro): - Pre-state config.toml with user [mcp_servers.omx_team_run] + codex-installed [plugins."tasks@openai-curated"], HERMES_HOME="/private/var/folders/.../pytest-of-.../..." - On origin/main: tomllib refuses to load the result with "Cannot declare ('plugins', 'tasks@openai-curated') twice" AND the pytest-tempdir HERMES_HOME is burned in. - On this branch: file parses cleanly, default_permissions is top-level, exactly one [plugins."tasks@openai-curated"] table inside the managed block, no HERMES_HOME in the MCP env. 7 new regression tests covering all three bugs + the test-leak guard. `bash scripts/run_tests.sh tests/hermes_cli/test_codex_runtime_*.py` — 95 passed, 0 failed. Closes #26250 --- hermes_cli/codex_runtime_plugin_migration.py | 125 ++++++++++- .../test_codex_runtime_plugin_migration.py | 207 ++++++++++++++++++ tests/hermes_cli/test_codex_runtime_switch.py | 9 +- 3 files changed, 337 insertions(+), 4 deletions(-) diff --git a/hermes_cli/codex_runtime_plugin_migration.py b/hermes_cli/codex_runtime_plugin_migration.py index 49b4905d5b2..4b30d3ebf26 100644 --- a/hermes_cli/codex_runtime_plugin_migration.py +++ b/hermes_cli/codex_runtime_plugin_migration.py @@ -335,6 +335,72 @@ def _insert_managed_block_at_top_level(user_text: str, managed_block: str) -> st return f"{managed_block}\n{suffix}" +def _strip_unmanaged_plugin_tables(toml_text: str) -> str: + """Remove ``[plugins."@"]`` tables that live OUTSIDE the + managed block. + + Codex itself writes these tables when the user runs ``codex plugins enable`` + directly (i.e. before Hermes' migrate has ever touched the file). When we + later run migrate, ``_query_codex_plugins()`` reports the same plugins via + the live ``plugin/list`` RPC and we re-emit them inside the managed block. + The result without this strip is duplicate ``[plugins."X@Y"]`` table + headers — codex's strict TOML parser then refuses to load the file. + + We own the ``[plugins.*]`` namespace once migrate has run, so dropping any + pre-existing ``[plugins.*]`` tables is safe: ``plugin/list`` is the source + of truth for what's actually installed. The caller is expected to only + invoke this strip when ``plugin/list`` succeeded — otherwise we'd lose + plugins the user installed via ``codex`` without a way to re-emit them. + + Behavior: + * Lines beginning with ``[plugins.`` start a swallow region that ends at + the next non-``[plugins.`` table header or end-of-file. + * Content inside the managed block is untouched (callers should run + ``_strip_existing_managed_block`` first so the managed block has + already been removed when this runs). + """ + lines = toml_text.splitlines(keepends=True) + out: list[str] = [] + in_plugin_table = False + for line in lines: + stripped = line.lstrip() + # Only treat a line as a table header when it has the shape + # ``[...]`` (optionally followed by a comment). Multi-line array + # continuations like ``["nested"],`` also start with ``[`` after + # lstrip but are not headers — without this guard they would + # falsely flip ``in_plugin_table`` to False mid-table and leak + # array fragments into the output. + if _looks_like_table_header(stripped): + in_plugin_table = stripped.startswith("[plugins.") + if in_plugin_table: + continue + if in_plugin_table: + # Swallow keys/comments/blanks until the next table header. + continue + out.append(line) + return "".join(out) + + +def _looks_like_table_header(stripped_line: str) -> bool: + """Return True if ``stripped_line`` is a TOML table header. + + A header has the shape ``[name]`` or ``[[name]]`` (array-of-tables), + optionally followed by a comment. The closing ``]`` (or ``]]``) must + appear on the same line, and no key-assignment ``=`` can precede it. + This distinguishes real headers from multi-line array continuation + lines that also start with ``[`` after ``lstrip()``. + """ + if not stripped_line.startswith("["): + return False + # Drop trailing comment so e.g. ``[features] # note`` still matches. + head = stripped_line.split("#", 1)[0].rstrip() + if not head.endswith("]"): + return False + # ``key = [x]`` would have an ``=`` before the bracket; a header doesn't. + bracket_idx = head.index("]") + return "=" not in head[: bracket_idx + 1] + + def _strip_existing_managed_block(toml_text: str) -> str: """Remove any prior managed section so re-runs idempotently replace it. @@ -462,6 +528,32 @@ def _query_codex_plugins( return out, None +def _looks_like_test_tempdir(path: str) -> bool: + """Heuristic: does ``path`` look like a pytest/transient tempdir? + + pytest tempdirs live under ``pytest-of-/pytest-/`` (created via + ``tmp_path`` / ``tmp_path_factory``) and are reaped between sessions. + macOS routes ``/tmp`` through ``/private/var/folders/<…>/T`` which is + what pytest's tempdir factory uses by default. If a HERMES_HOME pointing + at one of those paths is burned into ``~/.codex/config.toml``, every + codex-routed hermes-tools call fails silently once the directory is GC'd. + + We err on the side of refusing — losing a (very unlikely) real + ``~/.hermes`` symlink that happens to live under ``/private/var/folders`` + is much less harmful than silently bricking codex's tool surface. + """ + if not path: + return False + needles = ( + "pytest-of-", + "/pytest-", + "/tmp/pytest", + "/private/var/folders/", # macOS tempdir root + ) + normalized = path.lower() + return any(needle in normalized for needle in needles) + + def _build_hermes_tools_mcp_entry() -> dict: """Build the codex stdio-transport entry that launches Hermes' own tool surface as an MCP server. Codex's subprocess will call back into @@ -474,9 +566,22 @@ def _build_hermes_tools_mcp_entry() -> dict: import sys env: dict[str, str] = {} - # HERMES_HOME passes through if set so the MCP subprocess sees the - # same config / auth / sessions DB as the parent CLI. - hermes_home = os.environ.get("HERMES_HOME") + # HERMES_HOME passes through IF SET so the MCP subprocess sees the same + # config / auth / sessions DB as the parent CLI. Read from os.environ + # (not get_hermes_home()) on purpose: when the env var is unset we want + # codex's subprocess to inherit whatever HERMES_HOME its launcher sets + # at runtime (systemd unit, gateway, kanban dispatcher, custom shell), + # rather than burning the migrate-time resolved default into config.toml + # — that would override the launcher's HERMES_HOME and pin the subprocess + # to the wrong profile. + # + # The pytest-tempdir guard below catches the issue #26250 Bug C scenario: + # a sibling test's monkeypatch.setenv("HERMES_HOME", tmp_path) would + # otherwise leak a transient pytest tempdir into the user's real + # ~/.codex/config.toml and silently brick codex once the tempdir is GC'd. + hermes_home = os.environ.get("HERMES_HOME") or "" + if hermes_home and _looks_like_test_tempdir(hermes_home): + hermes_home = "" if hermes_home: env["HERMES_HOME"] = hermes_home # PYTHONPATH passes through so a worktree-launched hermes finds the @@ -564,10 +669,16 @@ def migrate( # Discover installed Codex curated plugins. Best-effort — never blocks # the migration if codex is unreachable or the RPC fails. plugins: list[dict] = [] + plugin_query_succeeded = False if discover_plugins and not dry_run: plugins, plugin_err = _query_codex_plugins(codex_home=codex_home) if plugin_err: report.plugin_query_error = plugin_err + else: + # plugin/list returned authoritatively (even if the list is empty). + # That means we own [plugins.*] for this re-render and can safely + # strip any pre-existing tables outside the managed block. + plugin_query_succeeded = True for p in plugins: report.migrated_plugins.append(f"{p['name']}@{p['marketplace']}") @@ -602,6 +713,14 @@ def migrate( report.errors.append(f"could not read {target}: {exc}") return report without_managed = _strip_existing_managed_block(existing) + # Bug B: when plugin/list ran authoritatively, codex's own + # [plugins."@"] tables outside our managed block + # would survive _strip_existing_managed_block and then collide with + # the entries we re-emit inside the managed block — producing + # duplicate-table-header parse errors on codex's next startup. Drop + # those pre-existing tables since plugin/list is the source of truth. + if plugin_query_succeeded: + without_managed = _strip_unmanaged_plugin_tables(without_managed) new_text = _insert_managed_block_at_top_level(without_managed, managed_block) else: new_text = managed_block diff --git a/tests/hermes_cli/test_codex_runtime_plugin_migration.py b/tests/hermes_cli/test_codex_runtime_plugin_migration.py index c283a668681..ebdc9f9ae6b 100644 --- a/tests/hermes_cli/test_codex_runtime_plugin_migration.py +++ b/tests/hermes_cli/test_codex_runtime_plugin_migration.py @@ -8,9 +8,13 @@ import pytest from hermes_cli.codex_runtime_plugin_migration import ( MIGRATION_MARKER, + MIGRATION_END_MARKER, MigrationReport, + _build_hermes_tools_mcp_entry, _format_toml_value, + _looks_like_test_tempdir, _strip_existing_managed_block, + _strip_unmanaged_plugin_tables, _translate_one_server, migrate, render_codex_toml_section, @@ -656,3 +660,206 @@ class TestMigrate: assert "Migrated 2 MCP server(s)" in summary assert "- a" in summary assert "- b" in summary + + +# ---- Bug B: duplicate [plugins.X] tables ---- + + +class TestStripUnmanagedPluginTables: + """Regression tests for issue #26250 Bug B. + + When codex itself writes ``[plugins."@"]`` tables + (via the user running ``codex plugins enable`` directly), re-running + ``hermes codex-runtime migrate`` would re-emit them inside the managed + block and the resulting duplicate-table-header would crash codex. + """ + + def test_strips_plugin_tables_outside_managed_block(self): + text = ( + 'model = "gpt-5.5"\n' + "\n" + "[mcp_servers.user-thing]\n" + 'command = "x"\n' + "\n" + '[plugins."tasks@openai-curated"]\n' + "enabled = true\n" + "\n" + '[plugins."web-search@openai-curated"]\n' + "enabled = true\n" + "\n" + "[features]\n" + "terminal_resize_reflow = true\n" + ) + stripped = _strip_unmanaged_plugin_tables(text) + assert "[plugins." not in stripped + # Non-plugin content preserved + assert "[mcp_servers.user-thing]" in stripped + assert "[features]" in stripped + assert "terminal_resize_reflow = true" in stripped + + def test_preserves_content_when_no_plugin_tables(self): + text = ( + 'model = "gpt-5.5"\n' + "\n" + "[mcp_servers.x]\n" + 'command = "y"\n' + ) + assert _strip_unmanaged_plugin_tables(text) == text + + def test_multi_line_array_in_plugin_table_does_not_leak(self): + """A multi-line TOML array inside a [plugins.X] table whose + continuation lines start with ``[`` (e.g. nested arrays) must NOT + prematurely exit the strip region — otherwise array fragments + leak into top-level output and produce invalid TOML on the next + codex startup. Regression guard for #26260 review. + """ + text = ( + '[plugins."tasks@openai-curated"]\n' + "allowed = [\n" + ' "a",\n' + ' ["nested"],\n' + "]\n" + "[features]\n" + "x = 1\n" + ) + stripped = _strip_unmanaged_plugin_tables(text) + # Everything inside the plugin table — including the multi-line + # array's continuation lines starting with `[` — should be gone. + assert '["nested"]' not in stripped + assert "allowed" not in stripped + # Sibling user table survives intact. + assert "[features]" in stripped + assert "x = 1" in stripped + # Result is still valid TOML. + import tomllib + tomllib.loads(stripped) + + def test_migrate_dedups_codex_owned_plugin_tables(self, tmp_path, monkeypatch): + """End-to-end: codex's pre-existing [plugins.X] tables get replaced by + the managed block's re-emission rather than duplicated.""" + target = tmp_path / "config.toml" + target.write_text( + "[mcp_servers.user-server]\n" + 'command = "x"\n' + "\n" + '[plugins."tasks@openai-curated"]\n' + "enabled = true\n" + ) + + # Simulate codex's plugin/list reporting the same plugin tasks@openai-curated. + def fake_query(codex_home=None, timeout=8.0): + return ( + [{"name": "tasks", "marketplace": "openai-curated", "enabled": True}], + None, + ) + + monkeypatch.setattr( + "hermes_cli.codex_runtime_plugin_migration._query_codex_plugins", + fake_query, + ) + migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False) + new_text = target.read_text() + # Only ONE [plugins."tasks@openai-curated"] header should remain — inside + # the managed block — not the original outside-the-block copy. + assert new_text.count('[plugins."tasks@openai-curated"]') == 1 + # And the surviving one is inside our managed section. + managed_start = new_text.index(MIGRATION_MARKER) + managed_end = new_text.index(MIGRATION_END_MARKER) + plugin_idx = new_text.index('[plugins."tasks@openai-curated"]') + assert managed_start < plugin_idx < managed_end + # File parses cleanly as TOML (the original duplicate-key error is gone). + import tomllib + tomllib.loads(new_text) + + def test_migrate_preserves_plugin_tables_when_plugin_list_fails(self, tmp_path, monkeypatch): + """If plugin/list RPC fails, we can't re-emit plugins authoritatively, + so we must NOT strip the user's existing [plugins.X] tables — that + would silently lose them.""" + target = tmp_path / "config.toml" + target.write_text( + '[plugins."tasks@openai-curated"]\n' + "enabled = true\n" + ) + + def fake_query(codex_home=None, timeout=8.0): + return ([], "plugin/list query failed: codex not installed") + + monkeypatch.setattr( + "hermes_cli.codex_runtime_plugin_migration._query_codex_plugins", + fake_query, + ) + migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False) + new_text = target.read_text() + # User's plugin table preserved verbatim — we can't re-emit it. + assert '[plugins."tasks@openai-curated"]' in new_text + + +# ---- Bug C: HERMES_HOME tempdir leak into ~/.codex/config.toml ---- + + +class TestHermesHomeLeakGuard: + """Regression tests for issue #26250 Bug C. + + Previously ``_build_hermes_tools_mcp_entry()`` read ``HERMES_HOME`` + directly from ``os.environ``, so a pytest ``monkeypatch.setenv`` would + leak a transient tempdir path into the user's real ``~/.codex/config.toml`` + once codex spawned the hermes-tools MCP subprocess. + """ + + def test_tempdir_detector_recognizes_pytest_paths(self): + assert _looks_like_test_tempdir( + "/private/var/folders/abc/pytest-of-kshitij/pytest-137/popen-gw2/test_X/hermes_test" + ) + assert _looks_like_test_tempdir( + "/tmp/pytest-of-user/pytest-12/test_X/hermes" + ) + assert _looks_like_test_tempdir( + "/private/var/folders/zz/T/pytest-of-bob/pytest-1" + ) + + def test_tempdir_detector_accepts_real_hermes_home(self): + assert not _looks_like_test_tempdir("/Users/alice/.hermes") + assert not _looks_like_test_tempdir("/home/bob/.hermes") + assert not _looks_like_test_tempdir("/opt/hermes") + assert not _looks_like_test_tempdir("") + + def test_pytest_tempdir_not_burned_into_mcp_env(self, monkeypatch): + """The headline regression: even when HERMES_HOME points at a pytest + tempdir, _build_hermes_tools_mcp_entry() must NOT propagate it.""" + monkeypatch.setenv( + "HERMES_HOME", + "/private/var/folders/xx/pytest-of-user/pytest-99/test_x/hermes_test", + ) + entry = _build_hermes_tools_mcp_entry() + env = entry.get("env", {}) + assert "HERMES_HOME" not in env, ( + f"pytest-tempdir HERMES_HOME leaked into codex MCP entry: " + f"{env.get('HERMES_HOME')!r}" + ) + + def test_real_hermes_home_propagates(self, monkeypatch, tmp_path): + """A legitimate HERMES_HOME (not a tempdir path) DOES propagate so the + MCP subprocess sees the same config as the parent CLI.""" + # Use a path that looks real — under /Users or /home, not /var/folders. + # We can't easily create one in the test, so just use a stable path + # outside any tempdir-detector needle. The detector checks for tempdir + # markers, not for path existence. + real_path = "/Users/alice/.hermes" + monkeypatch.setenv("HERMES_HOME", real_path) + entry = _build_hermes_tools_mcp_entry() + env = entry.get("env", {}) + assert env.get("HERMES_HOME") == real_path + + def test_unset_hermes_home_omits_env_key(self, monkeypatch): + """When HERMES_HOME is unset in the environment, the MCP entry MUST + NOT bake in a resolved-default path. The codex subprocess should + inherit whatever HERMES_HOME its launcher (systemd, gateway, shell) + sets at runtime, rather than being pinned to migrate-time defaults. + Regression guard for issue #26250 follow-up review.""" + monkeypatch.delenv("HERMES_HOME", raising=False) + entry = _build_hermes_tools_mcp_entry() + env = entry.get("env", {}) + assert "HERMES_HOME" not in env, ( + f"HERMES_HOME should not be set when env var is unset, got: " + f"{env.get('HERMES_HOME')!r}" + ) diff --git a/tests/hermes_cli/test_codex_runtime_switch.py b/tests/hermes_cli/test_codex_runtime_switch.py index 9a01543776e..7bf1a59e1e7 100644 --- a/tests/hermes_cli/test_codex_runtime_switch.py +++ b/tests/hermes_cli/test_codex_runtime_switch.py @@ -114,8 +114,15 @@ class TestApply: def persist(c): persisted.update(c) + # Patch migrate so this test doesn't reach into the user's real + # ~/.codex/config.toml. See issue #26250 Bug C — without this patch, + # crs.apply() invokes the real migrate() which writes to + # Path.home() / ".codex" using whatever HERMES_HOME the running pytest + # session has set, leaking pytest tempdir paths into the user's + # codex config. with patch.object(crs, "check_codex_binary_ok", - return_value=(True, "0.130.0")): + return_value=(True, "0.130.0")), \ + patch("hermes_cli.codex_runtime_plugin_migration.migrate"): r = crs.apply(cfg, "codex_app_server", persist_callback=persist) assert r.success assert r.new_value == "codex_app_server" From f199cd9f84d8e59f0e50ce8d99aa9ac8adcc571a Mon Sep 17 00:00:00 2001 From: kshitij <82637225+kshitijk4poor@users.noreply.github.com> Date: Fri, 15 May 2026 05:03:43 -0700 Subject: [PATCH 039/218] chore(release): map brian@dralth.com to btorresgil for #22345 salvage (#26319) PR #22345 by @btorresgil authors commits as 'Brian Conklin ' (git config carries a different name/email than the GitHub account). GitHub's commit-author mapping correctly attributes these commits to @btorresgil based on the public-key registration, but Hermes' release attribution audit reads the raw commit email, not the GitHub mapping. Without this AUTHOR_MAP entry, salvaging #22345 would fail `scripts/contributor_audit.py` strict mode at release time. Prerequisite for the langfuse trace fix salvage that cherry-picks @btorresgil's commits onto current main. --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 8a6f30802be..f3df43c3fe1 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -265,6 +265,7 @@ AUTHOR_MAP = { "yuxiangl490@gmail.com": "y0shua1ee", "manmit0x@gmail.com": "0xDevNinja", "stevekelly622@gmail.com": "steezkelly", + "brian@dralth.com": "btorresgil", "momowind@gmail.com": "momowind", "clockwork-codex@users.noreply.github.com": "misery-hl", "207811921+misery-hl@users.noreply.github.com": "misery-hl", From db84a78e618bf973ffc403ed2e1f8162f2591daa Mon Sep 17 00:00:00 2001 From: kshitij <82637225+kshitijk4poor@users.noreply.github.com> Date: Fri, 15 May 2026 05:04:02 -0700 Subject: [PATCH 040/218] =?UTF-8?q?fix(langfuse):=20complete=20observabili?= =?UTF-8?q?ty=20fix=20=E2=80=94=20trace=20I/O,=20tool=20outputs,=20placeho?= =?UTF-8?q?lder=20credentials=20(closes=20#22342,=20#22763)=20(#26320)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(langfuse): reject placeholder credentials with one-shot warning When operators leave HERMES_LANGFUSE_PUBLIC_KEY / HERMES_LANGFUSE_SECRET_KEY at a template value like 'placeholder', 'test-key', or 'your-langfuse-key', the Langfuse SDK silently accepts the credentials at construction time and drops every trace at flush time. No warning, no error — just an empty Langfuse dashboard the operator only notices hours later. Add prefix-based validation in _get_langfuse() against the documented 'pk-lf-' / 'sk-lf-' prefixes that Langfuse always issues server-side. Anything else fires a single warning naming the offending env var(s) with a log-safe value preview (full string for short placeholders so the operator knows which template they left in place; truncated for long values so a real secret pasted into the wrong field never hits the log), then short-circuits via the existing _INIT_FAILED cache so the warning fires once per process, not once per hook invocation. The check sits after the 'Langfuse is None' SDK-installed guard so hosts without the optional langfuse SDK don't see misleading 'set real keys' hints when the actionable fix is 'pip install langfuse'. Missing credentials remains the documented opt-out path and stays silent — no log noise for unconfigured installs. Fixes #22763 Fixes #23823 * fix(langfuse): use actual API request messages for generation input on_pre_llm_request previously used the messages kwarg alone, which could be None when Hermes passes the payload via request_messages, conversation_history, or user_message instead. Add _coerce_request_messages to pick the first available list across all variants, falling back to a synthetic user message. Generations now show the real outbound payload rather than an empty input. * fix(langfuse): record tool call outputs in traces Tool observations showed input (arguments) but output was always undefined. Root cause: when tool_call_id is empty, pre_tool_call stored observations under a unique time-based key that post_tool_call could never reconstruct, so every tool span was closed without output by the _finish_trace sweep. Fix pre/post matching by routing empty-tool_call_id tools through a per-name FIFO queue (pending_tools_by_name) instead of the time-based key. Tools with a tool_call_id continue to use the id-keyed dict. Also: - Preserve OpenAI-style nested function shape in serialized tool calls so Langfuse renders name/arguments correctly - Keep name + tool_call_id on role:tool messages for proper pairing - Backfill tool results onto the matching turn_tool_calls entry so the generation's tool-call record carries the result alongside arguments - Coerce request messages from whichever field the runtime provides (request_messages, messages, conversation_history, user_message) * fix(langfuse): salvage-review polish — drop dead is_first_turn, shallow-copy request_messages, real threaded FIFO test Self-review of the combined #22345 + #23831 salvage surfaced three issues worth fixing in the same PR rather than as follow-ups: 1. Drop is_first_turn from the pre_api_request hook. The boolean expression `not bool(conversation_history)` was wrong: conversation_history is reassigned to None mid-run after compression (5 sites in run_agent.py), so the value flips False -> True mid-conversation on every post-compression API call. The langfuse plugin never consumed it, so the kwarg was both misleading AND dead. 2. Replace copy.deepcopy(request_messages) with shallow list() copy. The pre_api_request hook contract discards return values (invoke_hook never writes back to api_kwargs), and the langfuse plugin's _serialize_messages already builds its own snapshot dicts via _safe_value. A deepcopy on every API call would walk every tool result and base64 image — significant overhead for no real isolation benefit. Shallow copy of the outer list protects against later mutations of api_messages without paying for the inner-dict walk. 3. Rename test_empty_tool_call_id_concurrent_fifo_order -> test_empty_tool_call_id_observations_are_fifo_within_tool_name and add a real test_threaded_post_calls_preserve_fifo_under_lock that spawns 8 threads behind a barrier to actually exercise _STATE_LOCK on the pending_tools_by_name queue. The original test was sequential and only validated Python list semantics; this one validates the lock discipline. 4. Fix stale 'Cleared by reset_cache_for_tests()' comment on _INIT_FAILED — that function does not exist. Tests reload the module via sys.modules.pop + importlib.import_module instead. Tests: 37 langfuse plugin tests pass, 658 plugin tests overall pass. --------- Co-authored-by: xxxigm Co-authored-by: Brian Conklin --- plugins/observability/langfuse/__init__.py | 168 ++++++- run_agent.py | 16 + tests/plugins/test_langfuse_plugin.py | 538 ++++++++++++++++++++- tests/run_agent/test_run_agent.py | 5 +- 4 files changed, 705 insertions(+), 22 deletions(-) diff --git a/plugins/observability/langfuse/__init__.py b/plugins/observability/langfuse/__init__.py index 9c9583261a6..8516030fb01 100644 --- a/plugins/observability/langfuse/__init__.py +++ b/plugins/observability/langfuse/__init__.py @@ -47,6 +47,7 @@ class TraceState: root_span: Any generations: Dict[str, Any] = field(default_factory=dict) tools: Dict[str, Any] = field(default_factory=dict) + pending_tools_by_name: Dict[str, list] = field(default_factory=dict) turn_tool_calls: list[dict[str, Any]] = field(default_factory=list) last_updated_at: float = field(default_factory=time.time) @@ -58,6 +59,17 @@ _READ_FILE_LINE_RE = re.compile(r"^\s*(\d+)\|(.*)$") _READ_FILE_HEAD_LINES = 25 _READ_FILE_TAIL_LINES = 15 +# Langfuse-issued keys always carry these prefixes (cloud or self-hosted — +# the prefix is baked into the server-side issuance flow, not a UI hint). +# Anything else (`placeholder`, `test-key`, `your-langfuse-key`, etc.) is a +# leftover template value and would cause the SDK to silently accept the +# credentials at construction time but drop every trace at flush time. +# See #23823 — the silent-failure bug this guard fixes. +_LANGFUSE_KEY_PREFIXES: Dict[str, str] = { + "HERMES_LANGFUSE_PUBLIC_KEY": "pk-lf-", + "HERMES_LANGFUSE_SECRET_KEY": "sk-lf-", +} + def _env(name: str, default: str = "") -> str: return os.environ.get(name, default).strip() @@ -82,10 +94,49 @@ def _debug(message: str) -> None: # Sentinel: "_get_langfuse() has tried and failed". Lets us short-circuit # every subsequent hook call without re-checking env vars or re-attempting -# SDK init. Cleared by reset_cache_for_tests(). +# SDK init. Tests clear this by reloading the module via +# ``sys.modules.pop(...) + importlib.import_module(...)`` rather than via a +# dedicated reset function. Runtime callers cannot reset the cache; if an +# operator fixes a misconfigured credential they must restart the process. _INIT_FAILED = object() +def _redact_key_preview(value: str) -> str: + """Return a brief, log-safe preview of a credential value. + + Keeps enough characters to disambiguate common placeholders + (``placeholder``, ``test-key``, ``your-key``) without echoing a + real secret in full if an operator pasted one into the wrong env + var. Used only for the once-per-process placeholder-detection + warning in :func:`_get_langfuse`. + """ + if not value: + return "" + if len(value) <= 12: + return repr(value) + return repr(value[:6] + "...") + + +def _validate_langfuse_key(env_name: str, value: str) -> Optional[str]: + """Return an error message if ``value`` is not a real Langfuse key. + + Returns ``None`` when the value matches the documented Langfuse + prefix for ``env_name``, or when no prefix is registered for the + name (in which case we trust the operator). When validation + fails the returned string is suitable for direct inclusion in a + single log line — it names the env var and shows a safe preview. + """ + expected = _LANGFUSE_KEY_PREFIXES.get(env_name, "") + if not expected: + return None + if value.startswith(expected): + return None + return ( + f"{env_name}={_redact_key_preview(value)} " + f"(expected {expected!r} prefix)" + ) + + def _get_langfuse() -> Optional[Langfuse]: """Return a cached Langfuse client, or ``None`` if unavailable. @@ -111,6 +162,33 @@ def _get_langfuse() -> Optional[Langfuse]: _LANGFUSE_CLIENT = _INIT_FAILED return None + # Reject placeholder credentials with a one-shot warning so the + # operator sees the misconfiguration instead of silently shipping a + # broken observability stack (#23823). The SDK does not validate + # keys at construction time — it queues traces in memory and only + # discovers the auth failure when the background flush thread tries + # to post them, by which point the warning is buried under whatever + # else the process is logging. Catch it here, surface it once, and + # short-circuit via the same _INIT_FAILED path as the empty case. + placeholder_issues = [ + msg + for msg in ( + _validate_langfuse_key("HERMES_LANGFUSE_PUBLIC_KEY", public_key), + _validate_langfuse_key("HERMES_LANGFUSE_SECRET_KEY", secret_key), + ) + if msg + ] + if placeholder_issues: + logger.warning( + "Langfuse plugin: credentials look like placeholders, traces will " + "NOT be emitted (%s). Set real Langfuse keys (pk-lf-... / sk-lf-...) " + "or unset HERMES_LANGFUSE_PUBLIC_KEY / HERMES_LANGFUSE_SECRET_KEY to " + "silence this warning.", + "; ".join(placeholder_issues), + ) + _LANGFUSE_CLIENT = _INIT_FAILED + return None + base_url = _env("HERMES_LANGFUSE_BASE_URL") or _env("LANGFUSE_BASE_URL") or "https://cloud.langfuse.com" environment = _env("HERMES_LANGFUSE_ENV") or _env("LANGFUSE_ENV") release = _env("HERMES_LANGFUSE_RELEASE") or _env("LANGFUSE_RELEASE") @@ -328,6 +406,21 @@ def _extract_last_user_message(messages: Any) -> Any: return None +def _coerce_request_messages( + *, + request_messages: Any = None, + messages: Any = None, + conversation_history: Any = None, + user_message: Any = None, +) -> list[dict[str, Any]]: + for candidate in (request_messages, messages, conversation_history): + if isinstance(candidate, list): + return candidate + if user_message is None: + return [] + return [{"role": "user", "content": user_message}] + + def _serialize_messages(messages: Any) -> list[dict[str, Any]]: if not isinstance(messages, list): return [] @@ -343,8 +436,11 @@ def _serialize_messages(messages: Any) -> list[dict[str, Any]]: parse_json_strings=(role == "tool"), ), } - if role == "tool" and message.get("tool_call_id"): - item["tool_call_id"] = message.get("tool_call_id") + if role == "tool": + if message.get("tool_call_id"): + item["tool_call_id"] = message.get("tool_call_id") + if message.get("name"): + item["name"] = _safe_value(message.get("name")) if message.get("tool_calls"): item["tool_calls"] = _safe_value(message.get("tool_calls"), parse_json_strings=True) serialized.append(item) @@ -359,15 +455,16 @@ def _serialize_tool_calls(tool_calls: Any) -> list[dict[str, Any]]: fn = getattr(tool_call, "function", None) name = getattr(fn, "name", None) if fn else None arguments = getattr(fn, "arguments", None) if fn else None - if isinstance(arguments, str): - try: - arguments = json.loads(arguments) - except Exception: - pass + safe_arguments = _safe_value(arguments, parse_json_strings=False) serialized.append({ "id": getattr(tool_call, "id", None), + "type": getattr(tool_call, "type", None) or "function", "name": name, - "arguments": _safe_value(arguments, parse_json_strings=True), + "arguments": safe_arguments, + "function": { + "name": name, + "arguments": safe_arguments, + }, }) return serialized @@ -564,6 +661,9 @@ def _finish_trace(task_key: str, *, output: Any = None) -> None: _end_observation(observation) for observation in state.tools.values(): _end_observation(observation) + for queue in state.pending_tools_by_name.values(): + for observation in queue: + _end_observation(observation) final_output = _merge_trace_output(output, state) if final_output is not None: state.root_span.set_trace_io(output=final_output) @@ -636,6 +736,7 @@ def on_pre_llm_request( base_url: str = "", api_mode: str = "", api_call_count: int = 0, + request_messages: Any = None, messages: Any = None, turn_type: str = "user", message_count: int = 0, @@ -643,12 +744,21 @@ def on_pre_llm_request( approx_input_tokens: int = 0, request_char_count: int = 0, max_tokens: Any = None, + conversation_history: Any = None, + user_message: Any = None, **_: Any, ) -> None: client = _get_langfuse() if client is None: return + input_messages = _coerce_request_messages( + request_messages=request_messages, + messages=messages, + conversation_history=conversation_history, + user_message=user_message, + ) + task_key = _trace_key(task_id, session_id) req_key = _request_key(api_call_count) @@ -663,7 +773,7 @@ def on_pre_llm_request( provider=provider, model=model, api_mode=api_mode, - messages=messages, + messages=input_messages, client=client, ) _TRACE_STATE[task_key] = state @@ -676,7 +786,7 @@ def on_pre_llm_request( client=client, name=f"LLM call {api_call_count}", as_type="generation", - input_value=_serialize_messages(messages), + input_value=_serialize_messages(input_messages), metadata={ "provider": provider, "platform": platform, @@ -815,13 +925,12 @@ def on_pre_tool_call(*, tool_name: str = "", args: Any = None, task_id: str = "" return task_key = _trace_key(task_id, session_id) - tool_key = tool_call_id or f"{tool_name}:{time.time_ns()}" with _STATE_LOCK: state = _TRACE_STATE.get(task_key) if state is None: return - state.tools[tool_key] = _start_child_observation( + observation = _start_child_observation( state, client=client, name=f"Tool: {tool_name}", @@ -829,22 +938,29 @@ def on_pre_tool_call(*, tool_name: str = "", args: Any = None, task_id: str = "" input_value=_safe_value(args), metadata={"tool_name": tool_name, "tool_call_id": tool_call_id}, ) + if tool_call_id: + state.tools[tool_call_id] = observation + else: + state.pending_tools_by_name.setdefault(tool_name, []).append(observation) def on_post_tool_call(*, tool_name: str = "", args: Any = None, result: Any = None, task_id: str = "", session_id: str = "", tool_call_id: str = "", **_: Any) -> None: task_key = _trace_key(task_id, session_id) - tool_key = tool_call_id or "" observation = None with _STATE_LOCK: state = _TRACE_STATE.get(task_key) if state is None: return - if tool_key: - observation = state.tools.pop(tool_key, None) - elif state.tools: - _, observation = state.tools.popitem() + if tool_call_id: + observation = state.tools.pop(tool_call_id, None) + if observation is None: + queue = state.pending_tools_by_name.get(tool_name) + if queue: + observation = queue.pop(0) + if not queue: + state.pending_tools_by_name.pop(tool_name, None) if observation is None: return @@ -854,10 +970,24 @@ def on_post_tool_call(*, tool_name: str = "", args: Any = None, result: Any = No else: result_value = result result_value = _normalize_payload(result_value, tool_name=tool_name, args=args) + safe_result_value = _safe_value(result_value, parse_json_strings=True) + + # Backfill so the generation's tool_call record carries the result alongside arguments. + if tool_call_id: + with _STATE_LOCK: + state = _TRACE_STATE.get(task_key) + if state is not None: + for tool_call in reversed(state.turn_tool_calls): + if tool_call.get("id") == tool_call_id: + tool_call["output"] = safe_result_value + function_payload = tool_call.get("function") + if isinstance(function_payload, dict): + function_payload["output"] = safe_result_value + break _end_observation( observation, - output=_safe_value(result_value, parse_json_strings=True), + output=safe_result_value, metadata={"tool_name": tool_name, "args": _safe_value(args, parse_json_strings=True)}, ) diff --git a/run_agent.py b/run_agent.py index 18ca03bd512..a4df8749777 100644 --- a/run_agent.py +++ b/run_agent.py @@ -12668,16 +12668,30 @@ class AIAgent: try: from hermes_cli.plugins import invoke_hook as _invoke_hook + request_messages = api_kwargs.get("messages") + if not isinstance(request_messages, list): + request_messages = api_kwargs.get("input") + if not isinstance(request_messages, list): + request_messages = api_messages + # Shallow-copy the outer list so plugins that retain the + # reference for async snapshotting don't observe later + # mutations of api_messages. The inner dicts are not + # mutated by the agent loop, so a shallow copy is + # sufficient; a deepcopy would walk every tool result + # and base64 image on every API call. _invoke_hook( "pre_api_request", task_id=effective_task_id, session_id=self.session_id or "", + user_message=original_user_message, + conversation_history=list(messages), platform=self.platform or "", model=self.model, provider=self.provider, base_url=self.base_url, api_mode=self.api_mode, api_call_count=api_call_count, + request_messages=list(request_messages) if isinstance(request_messages, list) else [], message_count=len(api_messages), tool_count=len(self.tools or []), approx_input_tokens=approx_tokens, @@ -14582,7 +14596,9 @@ class AIAgent: finish_reason=finish_reason, message_count=len(api_messages), response_model=getattr(response, "model", None), + response=response, usage=self._usage_summary_for_api_request_hook(response), + assistant_message=assistant_message, assistant_content_chars=len(_assistant_text), assistant_tool_call_count=len(_assistant_tool_calls), ) diff --git a/tests/plugins/test_langfuse_plugin.py b/tests/plugins/test_langfuse_plugin.py index 6d9fcce38ee..313d2e94a72 100644 --- a/tests/plugins/test_langfuse_plugin.py +++ b/tests/plugins/test_langfuse_plugin.py @@ -2,6 +2,7 @@ from __future__ import annotations import importlib +import logging import sys from pathlib import Path @@ -164,7 +165,542 @@ class TestHooksInert: # Each hook should just return; no exceptions. mod.on_pre_llm_call(task_id="t", session_id="s", messages=[{"role": "user", "content": "hi"}]) - mod.on_pre_llm_request(task_id="t", session_id="s", api_call_count=1, messages=[]) + mod.on_pre_llm_request(task_id="t", session_id="s", api_call_count=1, request_messages=[]) mod.on_post_llm_call(task_id="t", session_id="s", api_call_count=1) mod.on_pre_tool_call(tool_name="read_file", args={}, task_id="t", session_id="s") mod.on_post_tool_call(tool_name="read_file", args={}, result="ok", task_id="t", session_id="s") + + +# --------------------------------------------------------------------------- +# Placeholder-credential guard (#23823). +# +# Regression coverage for the silent-failure bug: when an operator leaves +# HERMES_LANGFUSE_PUBLIC_KEY / SECRET_KEY at a template value like +# "placeholder", "test-key", or "your-langfuse-key", the SDK accepts the +# credentials at construction time (it does no server-side validation +# eagerly) but drops every trace at flush time, with no signal in the +# Hermes logs. The fix in `_get_langfuse()` validates the documented +# `pk-lf-` / `sk-lf-` prefix Langfuse always issues, surfaces a one-shot +# warning naming the offending env var(s), and short-circuits via the +# same `_INIT_FAILED` path used for missing credentials so subsequent +# hook invocations don't re-log. +# --------------------------------------------------------------------------- + + +class _FakeLangfuse: + """Stand-in for the real :class:`langfuse.Langfuse` so tests don't + need the optional ``langfuse`` SDK installed. The plugin's runtime + gate refuses to proceed past ``if Langfuse is None`` when the SDK + is missing, which would short-circuit before the placeholder check + can fire. Patching ``plugin.Langfuse`` with this class lets the + placeholder validator exercise its full code path.""" + + instances: list["_FakeLangfuse"] = [] + + def __init__(self, **kwargs): + self.kwargs = kwargs + _FakeLangfuse.instances.append(self) + + +class TestPlaceholderKeyDetection: + LOGGER_NAME = "plugins.observability.langfuse" + + def _fresh_plugin(self, monkeypatch=None): + mod_name = "plugins.observability.langfuse" + sys.modules.pop(mod_name, None) + mod = importlib.import_module(mod_name) + if monkeypatch is not None: + # Pretend the SDK is installed so `_get_langfuse()` actually + # reaches the placeholder check. Real SDK calls are never + # made because the placeholder/missing-credentials paths + # return before constructing a client. + _FakeLangfuse.instances.clear() + monkeypatch.setattr(mod, "Langfuse", _FakeLangfuse, raising=False) + return mod + + @staticmethod + def _clear_env(monkeypatch): + for k in ( + "HERMES_LANGFUSE_PUBLIC_KEY", "HERMES_LANGFUSE_SECRET_KEY", + "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY", + ): + monkeypatch.delenv(k, raising=False) + + # -- helper unit tests (no SDK stub needed: these don't go through + # _get_langfuse, they exercise the pure-Python helpers directly) ------ + + def test_redact_key_preview_empty(self, monkeypatch): + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + assert plugin._redact_key_preview("") == "" + + def test_redact_key_preview_short_value_echoed(self, monkeypatch): + """Short placeholder strings are echoed in full so the operator + can see exactly which template they forgot to replace.""" + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + assert plugin._redact_key_preview("placeholder") == "'placeholder'" + assert plugin._redact_key_preview("test-key") == "'test-key'" + + def test_redact_key_preview_long_value_truncated(self, monkeypatch): + """If an operator pasted a real secret into the wrong env var the + preview must NOT echo it in full — only the leading 6 chars.""" + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + result = plugin._redact_key_preview("sk-lf-abcdefghijklmnop") + assert "abcdefghij" not in result + assert result.startswith("'sk-lf-") + assert result.endswith("...'") + + def test_validate_langfuse_key_accepts_documented_prefix(self, monkeypatch): + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + assert plugin._validate_langfuse_key( + "HERMES_LANGFUSE_PUBLIC_KEY", "pk-lf-real-public-xyz" + ) is None + assert plugin._validate_langfuse_key( + "HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz" + ) is None + + def test_validate_langfuse_key_rejects_wrong_prefix(self, monkeypatch): + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + msg = plugin._validate_langfuse_key( + "HERMES_LANGFUSE_PUBLIC_KEY", "placeholder" + ) + assert msg is not None + assert "HERMES_LANGFUSE_PUBLIC_KEY" in msg + assert "pk-lf-" in msg + + def test_validate_langfuse_key_unknown_name_passes(self, monkeypatch): + """Defensive: an env var with no registered prefix is trusted.""" + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + assert plugin._validate_langfuse_key("HERMES_LANGFUSE_BASE_URL", "anything") is None + + # -- end-to-end _get_langfuse() behaviour -------------------------------- + # These tests pass `monkeypatch` to _fresh_plugin() so the helper can + # stub out `Langfuse` (the optional SDK). Without that, every call + # short-circuits at `if Langfuse is None` before reaching the + # placeholder validator — masking the very behaviour we're testing. + + def test_placeholder_public_key_warns_and_skips(self, monkeypatch, caplog): + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + text = caplog.text + assert "HERMES_LANGFUSE_PUBLIC_KEY" in text + assert "'placeholder'" in text + assert "pk-lf-" in text + # The valid secret value must NOT appear (the var NAME does, in + # the "or unset ..." hint, but the value preview shouldn't). + assert "'sk-lf-" not in text + # Never constructed the SDK client — short-circuited before that. + assert _FakeLangfuse.instances == [] + + def test_placeholder_secret_key_warns_and_skips(self, monkeypatch, caplog): + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "pk-lf-real-public-xyz") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "test-key") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + text = caplog.text + assert "HERMES_LANGFUSE_SECRET_KEY" in text + assert "'test-key'" in text + assert "sk-lf-" in text + # The valid public value must NOT appear. + assert "'pk-lf-" not in text + assert _FakeLangfuse.instances == [] + + def test_both_placeholders_one_warning_with_both_keys(self, monkeypatch, caplog): + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "placeholder") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + warnings = [r for r in caplog.records if r.levelname == "WARNING" + and r.name == self.LOGGER_NAME] + assert len(warnings) == 1, ( + f"Expected a single combined warning; got {len(warnings)}:\n" + + "\n".join(r.getMessage() for r in warnings) + ) + text = warnings[0].getMessage() + assert "HERMES_LANGFUSE_PUBLIC_KEY" in text + assert "HERMES_LANGFUSE_SECRET_KEY" in text + + def test_repeated_calls_do_not_re_warn(self, monkeypatch, caplog): + """The cached ``_INIT_FAILED`` sentinel must short-circuit + subsequent calls so each hook invocation isn't a fresh log + line — otherwise a busy gateway will spam the operator's + terminal.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "placeholder") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + for _ in range(15): + assert plugin._get_langfuse() is None + warnings = [r for r in caplog.records if r.levelname == "WARNING" + and r.name == self.LOGGER_NAME] + assert len(warnings) == 1, ( + f"Warning fired {len(warnings)} times across 15 calls; " + "expected 1 (cached via _INIT_FAILED)" + ) + + @pytest.mark.parametrize("placeholder", [ + "placeholder", + "test-key", + "your-langfuse-key", + "change-me", + "xxx", + "dummy-key-here", + "", + "REPLACE_ME", + ]) + def test_common_placeholders_detected(self, monkeypatch, caplog, placeholder): + """A grab-bag of values that real-world ``.env.example`` templates + use as stand-ins. Any of them in either key must trip the guard.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", placeholder) + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + assert "HERMES_LANGFUSE_PUBLIC_KEY" in caplog.text + + def test_legacy_LANGFUSE_PUBLIC_KEY_also_validated(self, monkeypatch, caplog): + """The plugin reads both the canonical HERMES_-prefixed env var and + the legacy bare ``LANGFUSE_PUBLIC_KEY``. The validator must run on + whichever value ``_get_langfuse()`` actually consumed.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + # Warning names the canonical user-facing env var (the bare + # LANGFUSE_PUBLIC_KEY is a backwards-compat alias for the + # HERMES_-prefixed one — operators set the HERMES_-prefixed one). + assert "HERMES_LANGFUSE_PUBLIC_KEY" in caplog.text + assert "'placeholder'" in caplog.text + + def test_missing_credentials_still_skip_silently(self, monkeypatch, caplog): + """Missing-creds is the documented opt-out path (operator hasn't + configured the plugin yet) — it must remain SILENT. Regression + guard against the placeholder validator accidentally running on + empty values and re-introducing log noise for unconfigured + installs.""" + self._clear_env(monkeypatch) + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + warnings = [r for r in caplog.records if r.levelname == "WARNING" + and r.name == self.LOGGER_NAME] + assert warnings == [] + + def test_sdk_not_installed_still_skips_silently(self, monkeypatch, caplog): + """If the langfuse SDK isn't installed at all, the placeholder + check should never run — there's nothing the operator can do + about a credential mismatch when the package is missing, and + re-warning here would dilute the actually-actionable SDK-missing + signal upstream. The ``Langfuse is None`` guard at the top of + ``_get_langfuse`` already handles this; this test pins that + behaviour.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "placeholder") + # NO monkeypatch on Langfuse here — falls back to whatever the + # plugin imported at module load (None if SDK absent). + plugin = self._fresh_plugin() + monkeypatch.setattr(plugin, "Langfuse", None, raising=False) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + warnings = [r for r in caplog.records if r.levelname == "WARNING" + and r.name == self.LOGGER_NAME] + assert warnings == [] + + def test_valid_prefixes_do_not_trigger_placeholder_warning(self, monkeypatch, caplog): + """Real Langfuse keys (``pk-lf-…`` / ``sk-lf-…``) must pass the + guard and proceed to SDK init. We stub the SDK constructor with + a recording fake so the assertion can confirm BOTH that the + placeholder warning didn't fire AND that the client was actually + constructed — the latter is the success signal the bug report + wanted.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "pk-lf-real-public-xyz") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + client = plugin._get_langfuse() + assert isinstance(client, _FakeLangfuse) + assert client.kwargs["public_key"] == "pk-lf-real-public-xyz" + assert client.kwargs["secret_key"] == "sk-lf-real-secret-xyz" + assert "placeholders" not in caplog.text.lower(), ( + f"Valid Langfuse keys tripped the placeholder guard: {caplog.text!r}" + ) + + +class TestRequestMessageCoercion: + def test_prefers_request_messages_then_messages_then_history_then_user_message(self): + sys.modules.pop("plugins.observability.langfuse", None) + mod = importlib.import_module("plugins.observability.langfuse") + + assert mod._coerce_request_messages( + request_messages=[{"role": "system", "content": "s"}], + messages=[{"role": "user", "content": "m"}], + conversation_history=[{"role": "user", "content": "h"}], + user_message="u", + ) == [{"role": "system", "content": "s"}] + assert mod._coerce_request_messages( + messages=[{"role": "user", "content": "m"}], + conversation_history=[{"role": "user", "content": "h"}], + user_message="u", + ) == [{"role": "user", "content": "m"}] + assert mod._coerce_request_messages( + conversation_history=[{"role": "user", "content": "h"}], + user_message="u", + ) == [{"role": "user", "content": "h"}] + assert mod._coerce_request_messages(user_message="u") == [{"role": "user", "content": "u"}] + + +class TestToolCallOutputBackfill: + def test_post_tool_call_backfills_matching_turn_tool_call_output(self, monkeypatch): + sys.modules.pop("plugins.observability.langfuse", None) + mod = importlib.import_module("plugins.observability.langfuse") + + observation = object() + state = mod.TraceState(trace_id="trace-1", root_ctx=None, root_span=None) + state.tools["call-1"] = observation + state.turn_tool_calls.append({ + "id": "call-1", + "type": "function", + "name": "web_extract", + "arguments": '{"urls": ["https://example.com"]}', + "function": { + "name": "web_extract", + "arguments": '{"urls": ["https://example.com"]}', + }, + }) + + task_key = mod._trace_key("task-1", "session-1") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + ended = {} + + def fake_end_observation(obs, *, output=None, metadata=None, usage_details=None, cost_details=None): + ended["observation"] = obs + ended["output"] = output + ended["metadata"] = metadata + + monkeypatch.setattr(mod, "_end_observation", fake_end_observation) + + mod.on_post_tool_call( + tool_name="web_extract", + args={"urls": ["https://example.com"]}, + result='{"results": [{"url": "https://example.com", "content": "Example Domain"}]}', + task_id="task-1", + session_id="session-1", + tool_call_id="call-1", + ) + + assert ended["observation"] is observation + assert state.turn_tool_calls[0]["output"] == ended["output"] + assert state.turn_tool_calls[0]["function"]["output"] == ended["output"] + assert state.turn_tool_calls[0]["output"] == { + "results": [{"url": "https://example.com", "content": "Example Domain"}] + } + + def test_serialize_messages_keeps_tool_name_and_call_id(self): + sys.modules.pop("plugins.observability.langfuse", None) + mod = importlib.import_module("plugins.observability.langfuse") + + messages = [{ + "role": "tool", + "name": "web_extract", + "tool_call_id": "call-1", + "content": '{"ok": true}', + }] + + assert mod._serialize_messages(messages) == [{ + "role": "tool", + "name": "web_extract", + "tool_call_id": "call-1", + "content": {"ok": True}, + }] + + def test_serialize_tool_calls_emits_openai_style_function_shape(self): + sys.modules.pop("plugins.observability.langfuse", None) + mod = importlib.import_module("plugins.observability.langfuse") + + class _Fn: + name = "web_extract" + arguments = '{"urls": ["https://example.com"]}' + + class _ToolCall: + id = "call-1" + type = "function" + function = _Fn() + + assert mod._serialize_tool_calls([_ToolCall()]) == [{ + "id": "call-1", + "type": "function", + "name": "web_extract", + "arguments": '{"urls": ["https://example.com"]}', + "function": { + "name": "web_extract", + "arguments": '{"urls": ["https://example.com"]}', + }, + }] + + +class TestToolObservationKeying: + """Tests for pre/post tool_call observation matching when tool_call_id is absent.""" + + def _make_mod(self): + sys.modules.pop("plugins.observability.langfuse", None) + return importlib.import_module("plugins.observability.langfuse") + + def test_empty_tool_call_id_single_tool_sets_output(self, monkeypatch): + mod = self._make_mod() + obs = object() + state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None) + state.pending_tools_by_name.setdefault("my_tool", []).append(obs) + + task_key = mod._trace_key("task-1", "sess-1") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + ended = {} + + def fake_end(o, *, output=None, metadata=None, **kw): + ended["obs"] = o + ended["output"] = output + + monkeypatch.setattr(mod, "_end_observation", fake_end) + + mod.on_post_tool_call( + tool_name="my_tool", + args={}, + result='{"ok": true}', + task_id="task-1", + session_id="sess-1", + tool_call_id="", + ) + + assert ended["obs"] is obs + assert ended["output"] == {"ok": True} + assert state.pending_tools_by_name.get("my_tool") is None + + def test_empty_tool_call_id_observations_are_fifo_within_tool_name(self, monkeypatch): + """Two queued observations are consumed in FIFO order so the first + post hook gets the first observation's output, not the second. + + Sequential-on-one-thread coverage; the real concurrent case is + guarded by ``_STATE_LOCK`` around every read-modify-write on + ``pending_tools_by_name`` and is exercised in + ``test_threaded_post_calls_preserve_fifo_under_lock`` below. + """ + mod = self._make_mod() + obs_a, obs_b = object(), object() + state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None) + state.pending_tools_by_name["web_extract"] = [obs_a, obs_b] + + task_key = mod._trace_key("task-1", "sess-1") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + calls = [] + + def fake_end(o, *, output=None, metadata=None, **kw): + calls.append((o, output)) + + monkeypatch.setattr(mod, "_end_observation", fake_end) + + mod.on_post_tool_call( + tool_name="web_extract", args={}, result='{"val": "a"}', + task_id="task-1", session_id="sess-1", tool_call_id="", + ) + mod.on_post_tool_call( + tool_name="web_extract", args={}, result='{"val": "b"}', + task_id="task-1", session_id="sess-1", tool_call_id="", + ) + + assert calls[0] == (obs_a, {"val": "a"}) + assert calls[1] == (obs_b, {"val": "b"}) + assert state.pending_tools_by_name.get("web_extract") is None + + def test_threaded_post_calls_preserve_fifo_under_lock(self, monkeypatch): + """The actual concurrency contract: when 8 threads race to drain + the pending queue, no observation is consumed twice and none is + lost. Validates ``_STATE_LOCK`` discipline, not Python list + semantics.""" + import threading + + mod = self._make_mod() + n = 8 + observations = [object() for _ in range(n)] + state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None) + state.pending_tools_by_name["web_extract"] = list(observations) + + task_key = mod._trace_key("task-thr", "sess-thr") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + recorded: list = [] + lock = threading.Lock() + + def fake_end(o, *, output=None, metadata=None, **kw): + with lock: + recorded.append(o) + + monkeypatch.setattr(mod, "_end_observation", fake_end) + + barrier = threading.Barrier(n) + + def worker(): + barrier.wait() + mod.on_post_tool_call( + tool_name="web_extract", args={}, result='{"ok": true}', + task_id="task-thr", session_id="sess-thr", tool_call_id="", + ) + + threads = [threading.Thread(target=worker) for _ in range(n)] + for t in threads: + t.start() + for t in threads: + t.join() + + # Every observation was consumed exactly once; queue is empty. + assert len(recorded) == n + assert set(map(id, recorded)) == set(map(id, observations)) + assert state.pending_tools_by_name.get("web_extract") is None + + def test_explicit_tool_call_id_uses_tools_dict(self, monkeypatch): + """When tool_call_id is present, pending_tools_by_name is not touched.""" + mod = self._make_mod() + obs = object() + state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None) + state.tools["call-99"] = obs + + task_key = mod._trace_key("task-1", "sess-1") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + ended = {} + + def fake_end(o, *, output=None, metadata=None, **kw): + ended["obs"] = o + ended["output"] = output + + monkeypatch.setattr(mod, "_end_observation", fake_end) + + mod.on_post_tool_call( + tool_name="my_tool", args={}, result='{"status": "done"}', + task_id="task-1", session_id="sess-1", tool_call_id="call-99", + ) + + assert ended["obs"] is obs + assert ended["output"] == {"status": "done"} + assert not state.tools + diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index dadb7b31cce..c493f91509a 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2524,8 +2524,9 @@ class TestRunConversation: assert [call["api_call_count"] for call in pre_request_calls] == [1, 2] assert [call["api_call_count"] for call in post_request_calls] == [1, 2] assert all(call["session_id"] == agent.session_id for call in pre_request_calls) - assert all("message_count" in c and "messages" not in c for c in pre_request_calls) - assert all("usage" in c and "response" not in c for c in post_request_calls) + assert all("message_count" in c and isinstance(c.get("request_messages"), list) for c in pre_request_calls) + assert any(msg.get("role") == "user" and msg.get("content") == "search something" for msg in pre_request_calls[0]["request_messages"]) + assert all("usage" in c and "response" in c and "assistant_message" in c for c in post_request_calls) def test_content_with_tool_calls_stays_silent_for_non_cli_quiet_mode(self, agent): self._setup_agent(agent) From d5416284f11ccbc735c8357f0ab35ce5f683ccc3 Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Fri, 15 May 2026 19:31:00 +0530 Subject: [PATCH 041/218] fix(tui): autonomous background process completion notifications (#26071) (#26327) * feat(process-registry): add format_process_notification shared helper * feat(process-registry): add drain_notifications method * refactor(cli): use shared drain_notifications and format_process_notification * feat(tui): add background notification poller for completion_queue * feat(tui): wire notification poller into session init/finalize * refactor(tui): add post-turn drain using shared helper as safety net --- cli.py | 59 +--------- tests/test_tui_gateway_server.py | 155 +++++++++++++++++++++++++++ tests/tools/test_process_registry.py | 135 +++++++++++++++++++++++ tools/process_registry.py | 58 ++++++++++ tui_gateway/server.py | 134 +++++++++++++++++++++++ 5 files changed, 486 insertions(+), 55 deletions(-) diff --git a/cli.py b/cli.py index 27286a3c988..50e7a8c8ce9 100644 --- a/cli.py +++ b/cli.py @@ -1965,43 +1965,7 @@ def _resolve_attachment_path(raw_path: str) -> Path | None: return resolved -def _format_process_notification(evt: dict) -> "str | None": - """Format a process notification event into a [IMPORTANT: ...] message. - Handles both completion events (notify_on_complete) and watch pattern - match events from the unified completion_queue. - """ - evt_type = evt.get("type", "completion") - _sid = evt.get("session_id", "unknown") - _cmd = evt.get("command", "unknown") - - if evt_type == "watch_disabled": - return f"[IMPORTANT: {evt.get('message', '')}]" - - if evt_type == "watch_match": - _pat = evt.get("pattern", "?") - _out = evt.get("output", "") - _sup = evt.get("suppressed", 0) - text = ( - f"[IMPORTANT: Background process {_sid} matched " - f"watch pattern \"{_pat}\".\n" - f"Command: {_cmd}\n" - f"Matched output:\n{_out}" - ) - if _sup: - text += f"\n({_sup} earlier matches were suppressed by rate limit)" - text += "]" - return text - - # Default: completion event - _exit = evt.get("exit_code", "?") - _out = evt.get("output", "") - return ( - f"[IMPORTANT: Background process {_sid} completed " - f"(exit code {_exit}).\n" - f"Command: {_cmd}\n" - f"Output:\n{_out}]" - ) def _detect_file_drop(user_input: str) -> "dict | None": @@ -13518,16 +13482,8 @@ class HermesCLI: # and watch pattern matches) while agent is idle. try: from tools.process_registry import process_registry - if not process_registry.completion_queue.empty(): - evt = process_registry.completion_queue.get_nowait() - # Skip if the agent already consumed this via wait/poll/log - _evt_sid = evt.get("session_id", "") - if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid): - pass # already delivered via tool result - else: - _synth = _format_process_notification(evt) - if _synth: - self._pending_input.put(_synth) + for _evt, _synth in process_registry.drain_notifications(): + self._pending_input.put(_synth) except Exception: pass continue @@ -13635,15 +13591,8 @@ class HermesCLI: # that arrived while the agent was running. try: from tools.process_registry import process_registry - while not process_registry.completion_queue.empty(): - evt = process_registry.completion_queue.get_nowait() - # Skip if the agent already consumed this via wait/poll/log - _evt_sid = evt.get("session_id", "") - if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid): - continue # already delivered via tool result - _synth = _format_process_notification(evt) - if _synth: - self._pending_input.put(_synth) + for _evt, _synth in process_registry.drain_notifications(): + self._pending_input.put(_synth) except Exception: pass # Non-fatal — don't break the main loop diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 64a154bb9a7..0d5bad8e875 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -4649,3 +4649,158 @@ def test_config_show_displays_nested_max_turns(monkeypatch): ) assert ["Max Turns", "120"] in agent_rows + + +def test_notification_poller_delivers_completion(monkeypatch): + """Poller picks up completion events and triggers agent turns.""" + from tools.process_registry import process_registry + + turns = [] + emitted = [] + + class _Agent: + def run_conversation(self, prompt, conversation_history=None, stream_callback=None): + turns.append(prompt) + return { + "final_response": "ok", + "messages": [{"role": "assistant", "content": "ok"}], + } + + class _ImmediateThread: + def __init__(self, target=None, daemon=None): + self._target = target + def start(self): + self._target() + + sess = _session(agent=_Agent()) + server._sessions["sid_poll"] = sess + monkeypatch.setattr(server.threading, "Thread", _ImmediateThread) + monkeypatch.setattr(server, "_emit", lambda *a, **kw: emitted.append(a)) + monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None) + monkeypatch.setattr(server, "render_message", lambda raw, cols: None) + + # Clear queue + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + process_registry._completion_consumed.discard("proc_poller_test") + + stop = threading.Event() + + # Put event on queue, then immediately signal stop so the poller + # runs exactly one iteration. + process_registry.completion_queue.put({ + "type": "completion", + "session_id": "proc_poller_test", + "command": "echo hello", + "exit_code": 0, + "output": "hello", + }) + stop.set() + + try: + server._notification_poller_loop(stop, "sid_poll", sess) + + # Should have emitted a status.update with kind=process + status_calls = [a for a in emitted if a[0] == "status.update"] + assert len(status_calls) >= 1 + assert status_calls[0][2]["kind"] == "process" + + # Should have triggered an agent turn + assert len(turns) == 1 + assert "[IMPORTANT: Background process proc_poller_test completed" in turns[0] + finally: + server._sessions.pop("sid_poll", None) + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + +def test_notification_poller_skips_consumed(monkeypatch): + """Already-consumed completions are not dispatched by the poller.""" + from tools.process_registry import process_registry + + turns = [] + + class _Agent: + def run_conversation(self, prompt, conversation_history=None, stream_callback=None): + turns.append(prompt) + return {"final_response": "ok", "messages": []} + + class _ImmediateThread: + def __init__(self, target=None, daemon=None): + self._target = target + def start(self): + self._target() + + sess = _session(agent=_Agent()) + server._sessions["sid_skip"] = sess + monkeypatch.setattr(server.threading, "Thread", _ImmediateThread) + monkeypatch.setattr(server, "_emit", lambda *a, **kw: None) + monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None) + monkeypatch.setattr(server, "render_message", lambda raw, cols: None) + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + process_registry._completion_consumed.add("proc_already_done") + process_registry.completion_queue.put({ + "type": "completion", + "session_id": "proc_already_done", + "command": "echo x", + "exit_code": 0, + "output": "x", + }) + + stop = threading.Event() + stop.set() + + try: + server._notification_poller_loop(stop, "sid_skip", sess) + assert len(turns) == 0 + finally: + server._sessions.pop("sid_skip", None) + process_registry._completion_consumed.discard("proc_already_done") + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + +def test_notification_poller_requeues_when_busy(monkeypatch): + """When the agent is busy, the poller requeues the event.""" + from tools.process_registry import process_registry + + emitted = [] + + sess = _session(running=True) # agent is busy + server._sessions["sid_busy"] = sess + monkeypatch.setattr(server, "_emit", lambda *a, **kw: emitted.append(a)) + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + process_registry._completion_consumed.discard("proc_busy_test") + + evt = { + "type": "completion", + "session_id": "proc_busy_test", + "command": "make build", + "exit_code": 0, + "output": "ok", + } + process_registry.completion_queue.put(evt) + + stop = threading.Event() + stop.set() + + try: + server._notification_poller_loop(stop, "sid_busy", sess) + + # Status update was emitted (user sees it) + status_calls = [a for a in emitted if a[0] == "status.update"] + assert len(status_calls) == 1 + + # Event was requeued (agent was busy, no turn triggered) + assert not process_registry.completion_queue.empty() + requeued = process_registry.completion_queue.get_nowait() + assert requeued["session_id"] == "proc_busy_test" + finally: + server._sessions.pop("sid_busy", None) + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py index f438b637e28..46c29bb9d09 100644 --- a/tests/tools/test_process_registry.py +++ b/tests/tools/test_process_registry.py @@ -865,3 +865,138 @@ class TestProcessToolHandler: from tools.process_registry import _handle_process result = json.loads(_handle_process({"action": "unknown_action"})) assert "error" in result + + +# ========================================================================= +# format_process_notification + drain_notifications (shared helpers) +# ========================================================================= + +from tools.process_registry import format_process_notification + + +def test_format_completion_event(): + evt = { + "type": "completion", + "session_id": "proc_abc", + "command": "sleep 5", + "exit_code": 0, + "output": "done", + } + result = format_process_notification(evt) + assert "[IMPORTANT: Background process proc_abc completed" in result + assert "exit code 0" in result + assert "Command: sleep 5" in result + assert "Output:\ndone]" in result + + +def test_format_watch_match_event(): + evt = { + "type": "watch_match", + "session_id": "proc_xyz", + "command": "tail -f log", + "pattern": "ERROR", + "output": "ERROR: disk full", + "suppressed": 0, + } + result = format_process_notification(evt) + assert 'watch pattern "ERROR"' in result + assert "Matched output:\nERROR: disk full" in result + + +def test_format_watch_match_with_suppressed(): + evt = { + "type": "watch_match", + "session_id": "proc_xyz", + "command": "tail -f log", + "pattern": "WARN", + "output": "WARN: low mem", + "suppressed": 3, + } + result = format_process_notification(evt) + assert "3 earlier matches were suppressed" in result + + +def test_format_watch_disabled_event(): + evt = { + "type": "watch_disabled", + "message": "Watch disabled for proc_xyz: too many matches", + } + result = format_process_notification(evt) + assert "[IMPORTANT: Watch disabled for proc_xyz" in result + + +def test_format_returns_none_for_empty_event(): + evt = {} + result = format_process_notification(evt) + assert result is not None + assert "unknown" in result + + +def test_drain_notifications_returns_pending_events(): + from tools.process_registry import process_registry + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + process_registry.completion_queue.put({ + "type": "completion", + "session_id": "proc_drain1", + "command": "echo hi", + "exit_code": 0, + "output": "hi", + }) + process_registry.completion_queue.put({ + "type": "watch_match", + "session_id": "proc_drain2", + "command": "tail -f x", + "pattern": "ERR", + "output": "ERR found", + "suppressed": 0, + }) + + try: + results = process_registry.drain_notifications() + assert len(results) == 2 + assert results[0][0]["session_id"] == "proc_drain1" + assert "proc_drain1 completed" in results[0][1] + assert results[1][0]["session_id"] == "proc_drain2" + assert "watch pattern" in results[1][1] + finally: + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + process_registry._completion_consumed.discard("proc_drain1") + process_registry._completion_consumed.discard("proc_drain2") + + +def test_drain_notifications_skips_consumed(): + from tools.process_registry import process_registry + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + process_registry._completion_consumed.add("proc_consumed") + process_registry.completion_queue.put({ + "type": "completion", + "session_id": "proc_consumed", + "command": "echo done", + "exit_code": 0, + "output": "done", + }) + + try: + results = process_registry.drain_notifications() + assert len(results) == 0 + finally: + process_registry._completion_consumed.discard("proc_consumed") + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + +def test_drain_notifications_empty_queue(): + from tools.process_registry import process_registry + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + results = process_registry.drain_notifications() + assert results == [] diff --git a/tools/process_registry.py b/tools/process_registry.py index 405abc04a3c..184939adf75 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -826,6 +826,26 @@ class ProcessRegistry: """Check if a completion notification was already consumed via wait/poll/log.""" return session_id in self._completion_consumed + def drain_notifications(self) -> "list[tuple[dict, str]]": + """Pop all pending notification events and return formatted pairs. + + Returns a list of (raw_event, formatted_text) tuples. + Skips completion events that were already consumed via wait/poll/log. + """ + results = [] + while not self.completion_queue.empty(): + try: + evt = self.completion_queue.get_nowait() + except Exception: + break + _evt_sid = evt.get("session_id", "") + if evt.get("type") == "completion" and self.is_completion_consumed(_evt_sid): + continue + text = format_process_notification(evt) + if text: + results.append((evt, text)) + return results + def get(self, session_id: str) -> Optional[ProcessSession]: """Get a session by ID (running or finished).""" with self._lock: @@ -1388,6 +1408,44 @@ class ProcessRegistry: process_registry = ProcessRegistry() +def format_process_notification(evt: dict) -> "str | None": + """Format a process notification event into a [IMPORTANT: ...] message. + + Handles completion events (notify_on_complete), watch pattern matches, + and watch disabled events from the unified completion_queue. + """ + evt_type = evt.get("type", "completion") + _sid = evt.get("session_id", "unknown") + _cmd = evt.get("command", "unknown") + + if evt_type == "watch_disabled": + return f"[IMPORTANT: {evt.get('message', '')}]" + + if evt_type == "watch_match": + _pat = evt.get("pattern", "?") + _out = evt.get("output", "") + _sup = evt.get("suppressed", 0) + text = ( + f"[IMPORTANT: Background process {_sid} matched " + f"watch pattern \"{_pat}\".\n" + f"Command: {_cmd}\n" + f"Matched output:\n{_out}" + ) + if _sup: + text += f"\n({_sup} earlier matches were suppressed by rate limit)" + text += "]" + return text + + _exit = evt.get("exit_code", "?") + _out = evt.get("output", "") + return ( + f"[IMPORTANT: Background process {_sid} completed " + f"(exit code {_exit}).\n" + f"Command: {_cmd}\n" + f"Output:\n{_out}]" + ) + + # --------------------------------------------------------------------------- # Registry -- the "process" tool schema + handler # --------------------------------------------------------------------------- diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 230387ce23b..4a9bc2b6590 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -287,6 +287,9 @@ def _finalize_session(session: dict | None, end_reason: str = "tui_close") -> No if not session or session.get("_finalized"): return session["_finalized"] = True + stop_event = session.get("_notif_stop") + if stop_event is not None: + stop_event.set() agent = session.get("agent") lock = session.get("history_lock") @@ -579,6 +582,7 @@ def _start_agent_build(sid: str, session: dict) -> None: pass _wire_callbacks(sid) + _sessions[sid]["_notif_stop"] = _start_notification_poller(sid, _sessions[sid]) _notify_session_boundary("on_session_reset", key) info = _session_info(agent) @@ -1955,6 +1959,7 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80): # session startup resilient). pass _wire_callbacks(sid) + _sessions[sid]["_notif_stop"] = _start_notification_poller(sid, _sessions[sid]) _notify_session_boundary("on_session_reset", key) _emit("session.info", sid, _session_info(agent)) @@ -3027,6 +3032,105 @@ def _(rid, params: dict) -> dict: return _ok(rid, {"status": "streaming"}) +def _notification_poller_loop( + stop_event: threading.Event, sid: str, session: dict +) -> None: + """Poll completion_queue and dispatch notifications autonomously. + + Runs in a daemon thread started by _init_session(). Emits a + status.update (kind=process) for user visibility, then chains an + agent turn via _run_prompt_submit if the session is idle. + + NOTE: The completion_queue is global (one per process). If multiple + TUI sessions coexist, whichever poller wakes first grabs the event, + even if the process was started by a different session. This matches + CLI/gateway behavior (single session per process). + """ + from tools.process_registry import process_registry, format_process_notification + + while not stop_event.is_set() and not session.get("_finalized"): + try: + evt = process_registry.completion_queue.get(timeout=0.5) + except Exception: + continue + + _evt_sid = evt.get("session_id", "") + if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid): + continue + + text = format_process_notification(evt) + if not text: + continue + + _emit("status.update", sid, {"kind": "process", "text": text}) + + with session["history_lock"]: + if session.get("running"): + process_registry.completion_queue.put(evt) + continue + session["running"] = True + + rid = f"__notif__{int(time.time() * 1000)}" + try: + _emit("message.start", sid) + _run_prompt_submit(rid, sid, session, text) + except Exception as exc: + print( + f"[tui_gateway] notification poller dispatch failed: " + f"{type(exc).__name__}: {exc}", + file=sys.stderr, + ) + with session["history_lock"]: + session["running"] = False + + # Drain any remaining events after stop signal (process all pending + # before exiting so nothing is lost on shutdown). + while not process_registry.completion_queue.empty(): + try: + evt = process_registry.completion_queue.get_nowait() + except Exception: + break + _evt_sid = evt.get("session_id", "") + if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid): + continue + text = format_process_notification(evt) + if not text: + continue + + _emit("status.update", sid, {"kind": "process", "text": text}) + + with session["history_lock"]: + if session.get("running"): + process_registry.completion_queue.put(evt) + break + session["running"] = True + + rid = f"__notif__{int(time.time() * 1000)}" + try: + _emit("message.start", sid) + _run_prompt_submit(rid, sid, session, text) + except Exception as exc: + print( + f"[tui_gateway] notification poller dispatch failed: " + f"{type(exc).__name__}: {exc}", + file=sys.stderr, + ) + with session["history_lock"]: + session["running"] = False + + +def _start_notification_poller(sid: str, session: dict) -> threading.Event: + """Start the background notification poller for a TUI session.""" + stop = threading.Event() + t = threading.Thread( + target=_notification_poller_loop, + args=(stop, sid, session), + daemon=True, + ) + t.start() + return stop + + def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None: with session["history_lock"]: history = list(session["history"]) @@ -3385,6 +3489,36 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None: with session["history_lock"]: session["running"] = False + # Drain completion notifications that arrived during this turn. + # The background poller handles between-turn delivery; this is + # the safety net for events that arrived mid-turn. + try: + from tools.process_registry import process_registry + + for _evt, synth in process_registry.drain_notifications(): + with session["history_lock"]: + if session.get("running"): + process_registry.completion_queue.put(_evt) + break + session["running"] = True + try: + _emit("message.start", sid) + _run_prompt_submit(rid, sid, session, synth) + except Exception as _n_exc: + print( + f"[tui_gateway] completion notification dispatch failed: " + f"{type(_n_exc).__name__}: {_n_exc}", + file=sys.stderr, + ) + with session["history_lock"]: + session["running"] = False + except Exception as _drain_exc: + print( + f"[tui_gateway] completion queue drain failed: " + f"{type(_drain_exc).__name__}: {_drain_exc}", + file=sys.stderr, + ) + threading.Thread(target=run, daemon=True).start() From 9fb40e6a3d6338b6a6a616010de7a16672148924 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Fri, 15 May 2026 07:41:50 -0700 Subject: [PATCH 042/218] fix(tui): restrict fast-echo bypass to ASCII so Vietnamese/CJK/IME input renders correctly (#26011) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(tui): restrict fast-echo bypass to ASCII so Vietnamese/CJK/IME input renders correctly The composer's fast-echo path (canFastAppend / canFastBackspace) writes characters straight to stdout to skip an Ink re-render on the hot typing path. The previous guard only checked 'stringWidth(text) === text.length', which lets a lot of non-ASCII through: - Vietnamese precomposed letters (ề, ắ, ờ, ự, ...) report width 1 and length 1, but a Vietnamese Telex / IME stack produces them across multiple keystrokes; the intermediate composition state must be drawn by Ink so the rendered cell, the stored value, and the cursor column stay in lockstep when the final commit replaces the preview. - NFD combining marks (U+0300..U+036F) are zero-width but length 1, so even a passing equality lets them slip and silently desync the cell column. - CJK/East-Asian wide and emoji rejected only because their length differs, but the boundary was shape-shaped, not intent-shaped. User-visible bug from the original report: Example: eê noiói nge neène -> the bypass committed the IME preview char before the diacritic replaced it, leaving doubled letters on screen. Fix: gate fast-echo on pure printable ASCII (0x20-0x7e). The performance-critical English typing path is unchanged; everything else goes through the normal Ink render path so layout stays accurate. Also extracts the shape preconditions as pure exported helpers (canFastAppendShape / canFastBackspaceShape) so the regression matrix is testable without spinning up a TextInput. Tests: ui-tui/src/__tests__/textInputFastEcho.test.ts adds 20 cases covering ASCII still works, Vietnamese precomposed + NFD, CJK, emoji, NBSP / Latin-1, ANSI / control bytes, multi-line, and end-of-line preconditions. Verified RED on the previous guard (11 of 20 fail) and GREEN on the new guard. Refs: #5221, #7443, #17602, #17603 (similar wide-char rendering bugs). * docs(tui): clarify Vietnamese char terminology in regression comment Address Copilot review: 'single byte width' implied UTF-8 byte semantics, but the relevant property is JS code units (`text.length === 1`) and display width (`stringWidth === 1`). Reworded to match. --- .../src/__tests__/textInputFastEcho.test.ts | 136 ++++++++++++++++++ ui-tui/src/components/textInput.tsx | 101 ++++++++++--- 2 files changed, 218 insertions(+), 19 deletions(-) create mode 100644 ui-tui/src/__tests__/textInputFastEcho.test.ts diff --git a/ui-tui/src/__tests__/textInputFastEcho.test.ts b/ui-tui/src/__tests__/textInputFastEcho.test.ts new file mode 100644 index 00000000000..7f246f19f21 --- /dev/null +++ b/ui-tui/src/__tests__/textInputFastEcho.test.ts @@ -0,0 +1,136 @@ +import { describe, expect, it } from 'vitest' + +import { canFastAppendShape, canFastBackspaceShape } from '../components/textInput.js' + +// The fast-echo path bypasses Ink and writes characters directly to stdout +// for the common case of typing plain English at the end of the line. These +// tests pin the shape preconditions that make that bypass safe. +// +// Regression intent: any non-ASCII text — Vietnamese precomposed letters +// (one grapheme, `text.length === 1`, `stringWidth === 1`, but produced +// via IME composition across multiple keystrokes), combining marks +// (zero width), CJK (double width), emoji (variable width), or anything +// that could be produced by an in-flight IME composition — must NOT +// take the bypass. Closes: +// - "TUI is experiencing font errors when using Unicode to type Vietnamese" +// - #5221 TUI input box renders incorrectly for CJK / East-Asian wide +// - #7443 CLI TUI renders and deletes Chinese characters incorrectly +// - #17602 / #17603 Chinese text scattering / ghosting + +describe('canFastAppendShape', () => { + const COLS = 40 + + it('accepts plain ASCII appended at end of single-line input', () => { + expect(canFastAppendShape('hello', 5, 'x', COLS, 5)).toBe(true) + expect(canFastAppendShape('hello', 5, ' world', COLS, 5)).toBe(true) + }) + + it('rejects when cursor is not at end of line', () => { + expect(canFastAppendShape('hello', 3, 'x', COLS, 5)).toBe(false) + }) + + it('rejects when current is empty (placeholder render path needed)', () => { + expect(canFastAppendShape('', 0, 'x', COLS, 0)).toBe(false) + }) + + it('rejects when current contains a newline (multi-line layout)', () => { + expect(canFastAppendShape('hi\nthere', 8, 'x', COLS, 5)).toBe(false) + }) + + it('rejects when appending would hit the wrap column', () => { + // Reaching cols on append must trigger a wrap, which the bypass + // cannot draw. Stay strictly below cols. + expect(canFastAppendShape('hello', 5, 'x', 6, 5)).toBe(false) + }) + + // -- Regression coverage: Vietnamese / combining marks / IME -- + + it('rejects Vietnamese precomposed letter ề (U+1EC1) — IME composition path', () => { + // 'ề' is one grapheme, length 1, width 1, but Vietnamese Telex/IME + // produces it via a multi-key composition. Fast-echo would commit the + // intermediate state to stdout and desync once the final commit + // arrives. + expect(canFastAppendShape('hello', 5, 'ề', COLS, 5)).toBe(false) + }) + + it('rejects Vietnamese tone marks ă, ơ, ư (Latin-Extended-A/B)', () => { + for (const ch of ['ă', 'ắ', 'ơ', 'ờ', 'ư', 'ự']) { + expect(canFastAppendShape('hello', 5, ch, COLS, 5)).toBe(false) + } + }) + + it('rejects NFD combining marks (U+0300 grave, U+0301 acute, U+0302 circumflex)', () => { + // Decomposed Vietnamese: 'e' + combining circumflex + combining grave + // = 'ề'. Each combining mark is zero-width but length 1; without the + // ASCII guard the second/third keypress would be fast-echoed and + // desync the cell column. + expect(canFastAppendShape('hello', 5, '\u0300', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, '\u0301', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, '\u0302', COLS, 5)).toBe(false) + }) + + it('rejects CJK (East-Asian wide) characters', () => { + expect(canFastAppendShape('hello', 5, '你', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, '日本', COLS, 5)).toBe(false) + }) + + it('rejects emoji', () => { + expect(canFastAppendShape('hello', 5, '🙂', COLS, 5)).toBe(false) + }) + + it('rejects ANSI-bearing or control text', () => { + expect(canFastAppendShape('hello', 5, '\x1b[31m', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, '\t', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, '\x7f', COLS, 5)).toBe(false) + }) + + it('rejects NBSP and Latin-1 letters that would change the line shape', () => { + expect(canFastAppendShape('hello', 5, '\u00a0', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, 'é', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, 'ñ', COLS, 5)).toBe(false) + }) +}) + +describe('canFastBackspaceShape', () => { + it('accepts deleting the last ASCII char', () => { + expect(canFastBackspaceShape('hello', 5)).toBe(true) + }) + + it('rejects when cursor is not at end', () => { + expect(canFastBackspaceShape('hello', 3)).toBe(false) + }) + + it('rejects when there is nothing to delete', () => { + expect(canFastBackspaceShape('', 0)).toBe(false) + expect(canFastBackspaceShape('hello', 0)).toBe(false) + }) + + it('rejects when value contains a newline', () => { + expect(canFastBackspaceShape('hi\nthere', 8)).toBe(false) + }) + + it('rejects deleting Vietnamese precomposed letter ề', () => { + // The "\b \b" shortcut clears one terminal cell; that's fine for a + // 1-cell ASCII char but if the previous grapheme is a Vietnamese + // letter that the IME may still be holding open, we want Ink to + // re-render so composition state stays consistent. + expect(canFastBackspaceShape('helloề', 'helloề'.length)).toBe(false) + }) + + it('rejects deleting a CJK character (2 cells)', () => { + expect(canFastBackspaceShape('hi你', 'hi你'.length)).toBe(false) + }) + + it('rejects deleting a NFD-composed grapheme with combining marks', () => { + // 'e' + U+0302 (circumflex) + U+0300 (grave) — final grapheme is one + // cluster but the previous-grapheme slice is multi-codepoint. Width + // is 1 but the bypass would be unsafe because the rendered cell + // already contained the combined glyph. + const s = 'hello' + 'e\u0302\u0300' + expect(canFastBackspaceShape(s, s.length)).toBe(false) + }) + + it('rejects deleting an emoji', () => { + expect(canFastBackspaceShape('hi🙂', 'hi🙂'.length)).toBe(false) + }) +}) diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx index 0c63ceb93c8..91e109fa366 100644 --- a/ui-tui/src/components/textInput.tsx +++ b/ui-tui/src/components/textInput.tsx @@ -179,6 +179,84 @@ export function lineNav(s: string, p: number, dir: -1 | 1): null | number { export { offsetFromPosition } +const ASCII_PRINTABLE_RE = /^[\x20-\x7e]+$/ + +/** + * Pure shape-only precondition for the fast-echo append path. + * + * The fast-echo path bypasses Ink's renderer and writes text directly to + * stdout, so the stored value, the rendered terminal cells, and the cursor + * column must all stay in sync without any layout work. We only allow it + * when the inserted text is pure printable ASCII so that: + * + * - `text.length` matches the number of grapheme clusters (no combining + * marks, no surrogate pairs, no precomposed CJK / Latin-Extended + * letters that an IME might still be holding open as a composition), + * - terminal width is exactly 1 cell per character (no East-Asian wide, + * no zero-width, no ambiguous-width fonts), + * - input methods (Vietnamese Telex, IME, dead-keys) cannot leak + * intermediate composition bytes through the bypass before the final + * commit arrives — those always go through the normal Ink render path + * and stay layout-accurate (closes #5221, #7443, #17602/#17603). + * + * We deliberately do NOT just check `stringWidth(text) === text.length`: + * Vietnamese precomposed letters like "ề" (U+1EC1) report width 1 and + * length 1 but are still produced by IME compositions and must not be + * fast-echoed. + */ +export function canFastAppendShape( + current: string, + cursor: number, + text: string, + columns: number, + currentLineWidth: number +): boolean { + if (cursor !== current.length) { + return false + } + + if (current.length === 0) { + return false + } + + if (current.includes('\n')) { + return false + } + + if (!ASCII_PRINTABLE_RE.test(text)) { + return false + } + + return currentLineWidth + text.length < Math.max(1, columns) +} + +/** + * Pure shape-only precondition for the fast-echo backspace path. + * + * Same reasoning as canFastAppendShape — only allow the direct + * "\b \b" stdout shortcut when the deleted grapheme is pure printable + * ASCII. Anything else (combining marks, IME compositions, wide chars, + * tabs, ANSI fragments) goes through the normal render path so Ink can + * recompute cell widths. + */ +export function canFastBackspaceShape(current: string, cursor: number): boolean { + if (cursor !== current.length) { + return false + } + + if (cursor <= 0) { + return false + } + + if (current.includes('\n')) { + return false + } + + const removed = current.slice(prevPos(current, cursor), cursor) + + return ASCII_PRINTABLE_RE.test(removed) +} + function renderWithCursor(value: string, cursor: number) { const pos = Math.max(0, Math.min(cursor, value.length)) @@ -444,26 +522,11 @@ export function TextInput({ const canFastEchoBase = () => focus && termFocus && !selected && !mask && !!stdout?.isTTY - const canFastAppend = (current: string, cursor: number, text: string) => { - const sw = stringWidth(text) + const canFastAppend = (current: string, cursor: number, text: string) => + canFastEchoBase() && canFastAppendShape(current, cursor, text, columns, lineWidthRef.current) - return ( - canFastEchoBase() && - cursor === current.length && - current.length > 0 && - !current.includes('\n') && - sw === text.length && - lineWidthRef.current + sw < Math.max(1, columns) - ) - } - - const canFastBackspace = (current: string, cursor: number) => { - if (!canFastEchoBase() || cursor !== current.length || cursor <= 0 || current.includes('\n')) { - return false - } - - return stringWidth(current.slice(prevPos(current, cursor), cursor)) === 1 - } + const canFastBackspace = (current: string, cursor: number) => + canFastEchoBase() && canFastBackspaceShape(current, cursor) const commit = ( next: string, From b62c9979732c732480491c63a4399034f668a44f Mon Sep 17 00:00:00 2001 From: Jaaneek Date: Fri, 15 May 2026 16:10:38 +0100 Subject: [PATCH 043/218] feat(xai-oauth): add xAI Grok OAuth (SuperGrok Subscription) provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new authentication provider that lets SuperGrok subscribers sign in to Hermes with their xAI account via the standard OAuth 2.0 PKCE loopback flow, instead of pasting a raw API key from console.x.ai. Highlights ---------- * OAuth 2.0 PKCE loopback login against accounts.x.ai with discovery, state/nonce, and a strict CORS-origin allowlist on the callback. * Authorize URL carries `plan=generic` (required for non-allowlisted loopback clients) and `referrer=hermes-agent` for best-effort attribution in xAI's OAuth server logs. * Token storage in `auth.json` with file-locked atomic writes; JWT `exp`-based expiry detection with skew; refresh-token rotation synced both ways between the singleton store and the credential pool so multi-process / multi-profile setups don't tear each other's refresh tokens. * Reactive 401 retry: on a 401 from the xAI Responses API, the agent refreshes the token, swaps it back into `self.api_key`, and retries the call once. Guarded against silent account swaps when the active key was sourced from a different (manual) pool entry. * Auxiliary tasks (curator, vision, embeddings, etc.) route through a dedicated xAI Responses-mode auxiliary client instead of falling back to OpenRouter billing. * Direct HTTP tools (`tools/xai_http.py`, transcription, TTS, image-gen plugin) resolve credentials through a unified runtime → singleton → env-var fallback chain so xai-oauth users get them for free. * `hermes auth add xai-oauth` and `hermes auth remove xai-oauth N` are wired through the standard auth-commands surface; remove cleans up the singleton loopback_pkce entry so it doesn't silently reinstate. * `hermes model` provider picker shows "xAI Grok OAuth (SuperGrok Subscription)" and the model-flow falls back to pool credentials when the singleton is missing. Hardening --------- * Discovery and refresh responses validate the returned `token_endpoint` host against the same `*.x.ai` allowlist as the authorization endpoint, blocking MITM persistence of a hostile endpoint. * Discovery / refresh / token-exchange `response.json()` calls are wrapped to raise typed `AuthError` on malformed bodies (captive portals, proxy error pages) instead of leaking JSONDecodeError tracebacks. * `prompt_cache_key` is routed through `extra_body` on the codex transport (sending it as a top-level kwarg trips xAI's SDK with a TypeError). * Credential-pool sync-back preserves `active_provider` so refreshing an OAuth entry doesn't silently flip the active provider out from under the running agent. Testing ------- * New `tests/hermes_cli/test_auth_xai_oauth_provider.py` (~63 tests) covers JWT expiry, OAuth URL params (plan + referrer), CORS origins, redirect URI validation, singleton↔pool sync, concurrency races, refresh error paths, runtime resolution, and malformed-JSON guards. * Extended `test_credential_pool.py`, `test_codex_transport.py`, and `test_run_agent_codex_responses.py` cover the pool sync-back, `extra_body` routing, and 401 reactive refresh paths. * 165 tests passing on this branch via `scripts/run_tests.sh`. --- agent/auxiliary_client.py | 72 + agent/codex_responses_adapter.py | 15 +- agent/credential_pool.py | 184 +- agent/credential_sources.py | 30 + agent/transports/codex.py | 31 +- hermes_cli/auth.py | 806 ++++++++- hermes_cli/auth_commands.py | 31 +- hermes_cli/main.py | 89 +- hermes_cli/models.py | 43 +- hermes_cli/providers.py | 10 + hermes_cli/runtime_provider.py | 23 + hermes_cli/setup.py | 116 +- hermes_cli/tools_config.py | 74 +- plugins/image_gen/xai/__init__.py | 51 +- plugins/video_gen/xai/__init__.py | 97 +- run_agent.py | 78 +- .../agent/transports/test_codex_transport.py | 43 + .../test_auth_xai_oauth_provider.py | 1605 +++++++++++++++++ tests/plugins/image_gen/test_xai_provider.py | 9 +- tests/plugins/video_gen/test_xai_plugin.py | 44 + .../test_run_agent_codex_responses.py | 205 ++- tools/transcription_tools.py | 31 +- tools/tts_tool.py | 19 +- tools/xai_http.py | 49 + website/docs/guides/xai-grok-oauth.md | 214 +++ website/docs/integrations/providers.md | 4 +- website/sidebars.ts | 1 + 27 files changed, 3843 insertions(+), 131 deletions(-) create mode 100644 tests/hermes_cli/test_auth_xai_oauth_provider.py create mode 100644 website/docs/guides/xai-grok-oauth.md diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 96ad615bf6f..cd655e70e56 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1254,6 +1254,30 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[ return api_key, base_url +def _resolve_xai_oauth_for_aux() -> Optional[Tuple[str, str]]: + """Resolve a fresh xAI OAuth (api_key, base_url) for auxiliary clients. + + Routes through ``hermes_cli.auth``'s runtime resolver so the auto-refresh + path is shared with the main agent, instead of relying on whatever raw + tokens happen to be sitting in auth.json or the credential pool. Returns + ``None`` if the user is not authenticated with xAI Grok OAuth (so + ``_resolve_auto`` Step 1 falls through to the next provider in the chain). + """ + try: + from hermes_cli.auth import resolve_xai_oauth_runtime_credentials + + creds = resolve_xai_oauth_runtime_credentials() + except Exception as exc: + logger.debug("Auxiliary xAI OAuth runtime credential resolution failed: %s", exc) + return None + + api_key = str(creds.get("api_key") or "").strip() + base_url = str(creds.get("base_url") or "").strip().rstrip("/") + if not api_key or not base_url: + return None + return api_key, base_url + + def _read_codex_access_token() -> Optional[str]: """Read a valid, non-expired Codex OAuth access token from Hermes auth store. @@ -1744,6 +1768,32 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: return _fallback_client, model +def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str]]: + """Build a CodexAuxiliaryClient for an xAI Grok OAuth-authenticated session. + + xAI's ``/v1/responses`` endpoint speaks the OpenAI Responses API, so we + wrap a plain ``OpenAI`` client in ``CodexAuxiliaryClient`` to translate + ``chat.completions.create()`` calls into ``responses.stream()`` requests. + + The caller must pass an explicit model — pinning a default for Grok + would silently rot when xAI's allowlist drifts. Returns ``(None, None)`` + when the user has not authenticated with xAI Grok OAuth. + """ + if not model: + logger.warning( + "Auxiliary client: xai-oauth requested without a model; " + "pass model explicitly (auxiliary..model in config.yaml)." + ) + return None, None + resolved = _resolve_xai_oauth_for_aux() + if resolved is None: + return None, None + api_key, base_url = resolved + logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model) + real_client = OpenAI(api_key=api_key, base_url=base_url) + return CodexAuxiliaryClient(real_client, model), model + + def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]: """Build a CodexAuxiliaryClient for an explicitly-requested model. @@ -2851,6 +2901,26 @@ def resolve_provider_client( return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model)) + # ── xAI Grok OAuth (loopback PKCE → Responses API) ─────────────── + # Without this branch, an xai-oauth main provider falls through to the + # generic ``oauth_external`` arm below and returns ``(None, None)``, + # silently re-routing every auxiliary task (compression, web extract, + # session search, curator, etc.) to whatever Step-2 fallback the user + # has configured. Users on xAI Grok OAuth would then see surprise + # OpenRouter / Nous bills for side tasks they thought were running on + # their xAI subscription. + if provider == "xai-oauth": + client, default = _build_xai_oauth_aux_client(model) + if client is None: + logger.warning( + "resolve_provider_client: xai-oauth requested but no xAI " + "OAuth token found (run: hermes model -> xAI Grok OAuth — SuperGrok Subscription)" + ) + return None, None + final_model = _normalize_resolved_model(model or default, provider) + return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode + else (client, final_model)) + # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ─────────── if provider == "custom": if explicit_base_url: @@ -3201,6 +3271,8 @@ def resolve_provider_client( return resolve_provider_client("nous", model, async_mode) if provider == "openai-codex": return resolve_provider_client("openai-codex", model, async_mode) + if provider == "xai-oauth": + return resolve_provider_client("xai-oauth", model, async_mode) # Other OAuth providers not directly supported logger.warning("resolve_provider_client: OAuth provider %s not " "directly supported, try 'auto'", provider) diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index ef4119ceb89..00345f054e8 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -726,7 +726,7 @@ def _preflight_codex_api_kwargs( "model", "instructions", "input", "tools", "store", "reasoning", "include", "max_output_tokens", "temperature", "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier", - "extra_headers", + "extra_headers", "extra_body", } normalized: Dict[str, Any] = { "model": model, @@ -776,6 +776,19 @@ def _preflight_codex_api_kwargs( if normalized_headers: normalized["extra_headers"] = normalized_headers + extra_body = api_kwargs.get("extra_body") + if extra_body is not None: + if not isinstance(extra_body, dict): + raise ValueError("Codex Responses request 'extra_body' must be an object.") + # Pass extra_body through verbatim — used by xAI Responses to + # carry `prompt_cache_key` as a body-level field (the documented + # cache-routing surface on /v1/responses). The openai SDK + # serializes extra_body into the JSON body without per-field + # type checks, so it survives Responses.stream() kwarg-signature + # changes that would otherwise raise TypeError before the wire. + if extra_body: + normalized["extra_body"] = dict(extra_body) + if allow_stream: stream = api_kwargs.get("stream") if stream is not None and stream is not True: diff --git a/agent/credential_pool.py b/agent/credential_pool.py index aeda76225c8..504742145c1 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -29,6 +29,7 @@ from hermes_cli.auth import ( _resolve_zai_base_url, _save_auth_store, _save_provider_state, + _store_provider_state, read_credential_pool, write_credential_pool, ) @@ -539,6 +540,64 @@ class CredentialPool: logger.debug("Failed to sync Codex entry from auth.json: %s", exc) return entry + def _sync_xai_oauth_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential: + """Sync an xAI OAuth pool entry from auth.json if tokens differ. + + xAI OAuth refresh tokens are single-use. When another Hermes process + (or another profile sharing the same auth.json) refreshes the token, + it writes the new pair to ``providers["xai-oauth"]["tokens"]`` under + ``_auth_store_lock``. Without this resync, our in-memory pool entry + keeps the consumed refresh_token and the next ``_refresh_entry`` call + would replay it and get a ``refresh_token_reused``-style 4xx. + + Only applies to entries seeded from the singleton (``loopback_pkce``); + manually added entries (``manual:xai_pkce``) are independent + credentials with their own refresh-token lifecycle. + """ + if self.provider != "xai-oauth" or entry.source != "loopback_pkce": + return entry + try: + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "xai-oauth") + if not isinstance(state, dict): + return entry + tokens = state.get("tokens") + if not isinstance(tokens, dict): + return entry + store_access = tokens.get("access_token", "") + store_refresh = tokens.get("refresh_token", "") + entry_access = entry.access_token or "" + entry_refresh = entry.refresh_token or "" + if store_access and ( + store_access != entry_access + or (store_refresh and store_refresh != entry_refresh) + ): + logger.debug( + "Pool entry %s: syncing xAI OAuth tokens from auth.json " + "(refreshed by another process)", + entry.id, + ) + field_updates: Dict[str, Any] = { + "access_token": store_access, + "refresh_token": store_refresh or entry.refresh_token, + "last_status": None, + "last_status_at": None, + "last_error_code": None, + "last_error_reason": None, + "last_error_message": None, + "last_error_reset_at": None, + } + if state.get("last_refresh"): + field_updates["last_refresh"] = state["last_refresh"] + updated = replace(entry, **field_updates) + self._replace_entry(entry, updated) + self._persist() + return updated + except Exception as exc: + logger.debug("Failed to sync xAI OAuth entry from auth.json: %s", exc) + return entry + def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential: """Sync a Nous pool entry from auth.json if tokens differ. @@ -604,9 +663,22 @@ class CredentialPool: re-seeding a consumed single-use refresh token. Applies to any OAuth provider whose singleton lives in auth.json - (currently Nous and OpenAI Codex). + (currently Nous, OpenAI Codex, and xAI Grok OAuth). + + ``set_active=False`` on every write: a pool sync-back is a + token-rotation side effect, not the user choosing a provider. + Using ``_save_provider_state`` (which sets ``active_provider``) + here would mean every Nous/Codex/xAI refresh in a multi-provider + setup silently flips the ``active_provider`` flag — the next + ``hermes`` invocation that defaults to the active provider + (e.g. setup wizard, ``hermes auth status``) would land on + whatever provider happened to refresh last, not whatever the + user actually chose. """ - if entry.source != "device_code": + # Only sync entries that were seeded *from* a singleton. Manually + # added pool entries (source="manual:*") are independent credentials + # and must not write back to the singleton. + if entry.source not in {"device_code", "loopback_pkce"}: return try: with _auth_store_lock(): @@ -632,7 +704,7 @@ class CredentialPool: state[extra_key] = val if entry.inference_base_url: state["inference_base_url"] = entry.inference_base_url - _save_provider_state(auth_store, "nous", state) + _store_provider_state(auth_store, "nous", state, set_active=False) elif self.provider == "openai-codex": state = _load_provider_state(auth_store, "openai-codex") @@ -646,7 +718,21 @@ class CredentialPool: tokens["refresh_token"] = entry.refresh_token if entry.last_refresh: state["last_refresh"] = entry.last_refresh - _save_provider_state(auth_store, "openai-codex", state) + _store_provider_state(auth_store, "openai-codex", state, set_active=False) + + elif self.provider == "xai-oauth": + state = _load_provider_state(auth_store, "xai-oauth") + if not isinstance(state, dict): + return + tokens = state.get("tokens") + if not isinstance(tokens, dict): + return + tokens["access_token"] = entry.access_token + if entry.refresh_token: + tokens["refresh_token"] = entry.refresh_token + if entry.last_refresh: + state["last_refresh"] = entry.last_refresh + _store_provider_state(auth_store, "xai-oauth", state, set_active=False) else: return @@ -699,6 +785,25 @@ class CredentialPool: refresh_token=refreshed["refresh_token"], last_refresh=refreshed.get("last_refresh"), ) + elif self.provider == "xai-oauth": + # Adopt fresher tokens from auth.json before spending the + # refresh_token — single-use tokens consumed by another + # process (or another profile sharing the singleton) would + # otherwise trigger ``refresh_token_reused`` on the next + # POST. Only meaningful for singleton-seeded entries. + synced = self._sync_xai_oauth_entry_from_auth_store(entry) + if synced is not entry: + entry = synced + refreshed = auth_mod.refresh_xai_oauth_pure( + entry.access_token, + entry.refresh_token, + ) + updated = replace( + entry, + access_token=refreshed["access_token"], + refresh_token=refreshed["refresh_token"], + last_refresh=refreshed.get("last_refresh"), + ) elif self.provider == "nous": synced = self._sync_nous_entry_from_auth_store(entry) if synced is not entry: @@ -777,6 +882,30 @@ class CredentialPool: # Credentials file had a valid (non-expired) token — use it directly logger.debug("Credentials file has valid token, using without refresh") return synced + # For xai-oauth: same race as nous — another process may have + # consumed the refresh token between our proactive sync and the + # HTTP call. Re-check auth.json and adopt the fresh tokens if + # they have rotated since. Only meaningful for singleton-seeded + # (loopback_pkce) entries; manual entries don't share state with + # the singleton. + if self.provider == "xai-oauth": + synced = self._sync_xai_oauth_entry_from_auth_store(entry) + if synced.refresh_token != entry.refresh_token: + logger.debug( + "xAI OAuth refresh failed but auth.json has newer tokens — adopting" + ) + updated = replace( + synced, + last_status=STATUS_OK, + last_status_at=None, + last_error_code=None, + last_error_reason=None, + last_error_message=None, + last_error_reset_at=None, + ) + self._replace_entry(synced, updated) + self._persist() + return updated # For nous: another process may have consumed the refresh token # between our proactive sync and the HTTP call. Re-sync from # auth.json and adopt the fresh tokens if available. @@ -829,6 +958,11 @@ class CredentialPool: entry.access_token, CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, ) + if self.provider == "xai-oauth": + return auth_mod._xai_access_token_is_expiring( + entry.access_token, + auth_mod.XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, + ) if self.provider == "nous": # Nous refresh/mint can require network access and should happen when # runtime credentials are actually resolved, not merely when the pool @@ -883,6 +1017,17 @@ class CredentialPool: if synced is not entry: entry = synced cleared_any = True + # For xai-oauth singleton-seeded entries, identical pattern: + # an entry frozen as exhausted may simply be holding stale + # tokens that another process (or a fresh `hermes model` -> + # xAI Grok OAuth login) has since rotated in auth.json. + if (self.provider == "xai-oauth" + and entry.source == "loopback_pkce" + and entry.last_status == STATUS_EXHAUSTED): + synced = self._sync_xai_oauth_entry_from_auth_store(entry) + if synced is not entry: + entry = synced + cleared_any = True if entry.last_status == STATUS_EXHAUSTED: exhausted_until = _exhausted_until(entry) if exhausted_until is not None and now < exhausted_until: @@ -1394,6 +1539,37 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup }, ) + elif provider == "xai-oauth": + # When the user logs in via ``hermes model`` -> xAI Grok OAuth, + # tokens are written to the auth.json singleton + # (``providers["xai-oauth"]``). Surface them in the pool too so + # ``hermes auth list`` reflects the logged-in state and so the pool + # is the single source of truth for refresh during runtime resolution. + if _is_suppressed(provider, "loopback_pkce"): + return changed, active_sources + + state = _load_provider_state(auth_store, "xai-oauth") + tokens = state.get("tokens") if isinstance(state, dict) else None + if isinstance(tokens, dict) and tokens.get("access_token"): + active_sources.add("loopback_pkce") + from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL + + base_url = DEFAULT_XAI_OAUTH_BASE_URL + changed |= _upsert_entry( + entries, + provider, + "loopback_pkce", + { + "source": "loopback_pkce", + "auth_type": AUTH_TYPE_OAUTH, + "access_token": tokens.get("access_token", ""), + "refresh_token": tokens.get("refresh_token"), + "base_url": base_url, + "last_refresh": state.get("last_refresh"), + "label": label_from_token(tokens.get("access_token", ""), "loopback_pkce"), + }, + ) + return changed, active_sources diff --git a/agent/credential_sources.py b/agent/credential_sources.py index 74204919248..ee035426023 100644 --- a/agent/credential_sources.py +++ b/agent/credential_sources.py @@ -265,6 +265,31 @@ def _remove_minimax_oauth(provider: str, removed) -> RemovalResult: return result +def _remove_xai_oauth_loopback_pkce(provider: str, removed) -> RemovalResult: + """xAI OAuth tokens live in auth.json providers.xai-oauth — clear them. + + Without this step, ``hermes auth remove xai-oauth `` silently undoes + itself: the central dispatcher only removes the in-memory pool entry, + leaves ``providers.xai-oauth`` in auth.json intact, and on the next + ``load_pool("xai-oauth")`` call ``_seed_from_singletons`` re-seeds the + entry from the still-present singleton — credentials reappear with no + user feedback. Clearing the singleton in step with the suppression set + by the central dispatcher makes the removal stick. + + Belt-and-braces against the manual entry path: ``hermes auth add + xai-oauth`` produces a ``manual:xai_pkce`` entry whose removal step + falls through to "unregistered → nothing to clean up" (correct — + manual entries are pool-only). + """ + result = RemovalResult() + if _clear_auth_store_provider(provider): + result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store") + result.hints.append( + "Run `hermes model` → xAI Grok OAuth (SuperGrok Subscription) to re-authenticate if needed." + ) + return result + + def _remove_codex_device_code(provider: str, removed) -> RemovalResult: """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json. @@ -397,6 +422,11 @@ def _register_all_sources() -> None: remove_fn=_remove_codex_device_code, description="auth.json providers.openai-codex + ~/.codex/auth.json", )) + register(RemovalStep( + provider="xai-oauth", source_id="loopback_pkce", + remove_fn=_remove_xai_oauth_loopback_pkce, + description="auth.json providers.xai-oauth", + )) register(RemovalStep( provider="qwen-oauth", source_id="qwen-cli", remove_fn=_remove_qwen_cli, diff --git a/agent/transports/codex.py b/agent/transports/codex.py index 6738ed3220c..46169e971ba 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -89,18 +89,25 @@ class ResponsesApiTransport(ProviderTransport): _effort_clamp = {"minimal": "low"} reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort) + response_tools = _responses_tools(tools) kwargs = { "model": model, "instructions": instructions, "input": _chat_messages_to_responses_input(payload_messages), - "tools": _responses_tools(tools), - "tool_choice": "auto", - "parallel_tool_calls": True, + "tools": response_tools, "store": False, } + if response_tools: + kwargs["tool_choice"] = "auto" + kwargs["parallel_tool_calls"] = True session_id = params.get("session_id") - if not is_github_responses and session_id: + # xAI's Responses API uses `prompt_cache_key` (body-level) as the + # cache-routing key, not a top-level kwarg — the body-field + # injection below survives openai SDK builds whose + # Responses.stream() signature drops the kwarg. Everything else + # that ISN'T github/xAI keeps using the typed kwarg. + if not is_github_responses and not is_xai_responses and session_id: kwargs["prompt_cache_key"] = session_id if reasoning_enabled and is_xai_responses: @@ -165,6 +172,22 @@ class ResponsesApiTransport(ProviderTransport): merged_extra_headers["x-grok-conv-id"] = session_id kwargs["extra_headers"] = merged_extra_headers + # xAI Responses cache-routing field. Lives in the request body + # (per https://docs.x.ai/.../prompt-caching/maximizing-cache-hits), + # so we ship it via extra_body — the openai SDK serializes + # extra_body fields into the JSON body without per-field type + # validation, sidestepping the TypeError that fires on + # Responses.stream() builds whose `prompt_cache_key` kwarg has + # been dropped. Setdefault preserves a caller-supplied value + # (e.g. request_overrides.extra_body.prompt_cache_key) over + # the auto-derived session_id. + existing_extra_body = kwargs.get("extra_body") + merged_extra_body: Dict[str, Any] = {} + if isinstance(existing_extra_body, dict): + merged_extra_body.update(existing_extra_body) + merged_extra_body.setdefault("prompt_cache_key", session_id) + kwargs["extra_body"] = merged_extra_body + return kwargs def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 2dcf6a03b45..8749cd9461c 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -72,6 +72,7 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 # 30 minutes ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # refresh 2 min before expiry DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1 # poll at most every 1s DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex" +DEFAULT_XAI_OAUTH_BASE_URL = "https://api.x.ai/v1" MINIMAX_OAUTH_CLIENT_ID = "78257093-7e40-4613-99e0-527b14b39113" MINIMAX_OAUTH_SCOPE = "group_id profile model.completion" MINIMAX_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:user_code" @@ -89,6 +90,14 @@ STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1" CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann" CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token" CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 +XAI_OAUTH_ISSUER = "https://auth.x.ai" +XAI_OAUTH_DISCOVERY_URL = f"{XAI_OAUTH_ISSUER}/.well-known/openid-configuration" +XAI_OAUTH_CLIENT_ID = "b1a00492-073a-47ea-816f-4c329264a828" +XAI_OAUTH_SCOPE = "openid profile email offline_access grok-cli:access api:access" +XAI_OAUTH_REDIRECT_HOST = "127.0.0.1" +XAI_OAUTH_REDIRECT_PORT = 56121 +XAI_OAUTH_REDIRECT_PATH = "/callback" +XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56" QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token" QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 @@ -162,6 +171,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { auth_type="oauth_external", inference_base_url=DEFAULT_CODEX_BASE_URL, ), + "xai-oauth": ProviderConfig( + id="xai-oauth", + name="xAI Grok OAuth (SuperGrok Subscription)", + auth_type="oauth_external", + inference_base_url=DEFAULT_XAI_OAUTH_BASE_URL, + ), "qwen-oauth": ProviderConfig( id="qwen-oauth", name="Qwen OAuth", @@ -1364,6 +1379,8 @@ def resolve_provider( "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai", "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini", "x-ai": "xai", "x.ai": "xai", "grok": "xai", + "xai-oauth": "xai-oauth", "x-ai-oauth": "xai-oauth", + "grok-oauth": "xai-oauth", "xai-grok-oauth": "xai-oauth", "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding", "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn", "step": "stepfun", "stepfun-coding-plan": "stepfun", @@ -1907,6 +1924,16 @@ def _spotify_code_challenge(code_verifier: str) -> str: return base64.urlsafe_b64encode(digest).decode("ascii").rstrip("=") +def _oauth_pkce_code_verifier(length: int = 64) -> str: + raw = base64.urlsafe_b64encode(os.urandom(length)).decode("ascii") + return raw.rstrip("=")[:128] + + +def _oauth_pkce_code_challenge(code_verifier: str) -> str: + digest = hashlib.sha256(code_verifier.encode("utf-8")).digest() + return base64.urlsafe_b64encode(digest).decode("ascii").rstrip("=") + + def _spotify_build_authorize_url( *, client_id: str, @@ -2029,6 +2056,158 @@ def _spotify_wait_for_callback( ) +def _xai_validate_loopback_redirect_uri(redirect_uri: str) -> tuple[str, int, str]: + parsed = urlparse(redirect_uri) + if parsed.scheme != "http": + raise AuthError( + "xAI OAuth redirect_uri must use http://127.0.0.1.", + provider="xai-oauth", + code="xai_redirect_invalid", + ) + host = parsed.hostname or "" + if host != XAI_OAUTH_REDIRECT_HOST: + raise AuthError( + "xAI OAuth redirect_uri must point to 127.0.0.1.", + provider="xai-oauth", + code="xai_redirect_invalid", + ) + if not parsed.port: + raise AuthError( + "xAI OAuth redirect_uri must include an explicit localhost port.", + provider="xai-oauth", + code="xai_redirect_invalid", + ) + return host, parsed.port, parsed.path or "/" + + +def _xai_callback_cors_origin(origin: Optional[str]) -> str: + allowed = { + "https://accounts.x.ai", + "https://auth.x.ai", + "https://accounts.mouseion.dev", + "http://localhost:20000", + "http://127.0.0.1:20000", + } + return origin if origin in allowed else "" + + +def _make_xai_callback_handler(expected_path: str) -> tuple[type[BaseHTTPRequestHandler], dict[str, Any]]: + result: dict[str, Any] = { + "code": None, + "state": None, + "error": None, + "error_description": None, + } + + class _XAICallbackHandler(BaseHTTPRequestHandler): + def _maybe_write_cors_headers(self) -> None: + origin = self.headers.get("Origin") + allow_origin = _xai_callback_cors_origin(origin) + if allow_origin: + self.send_header("Access-Control-Allow-Origin", allow_origin) + self.send_header("Access-Control-Allow-Methods", "GET, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type") + self.send_header("Access-Control-Allow-Private-Network", "true") + self.send_header("Vary", "Origin") + + def do_OPTIONS(self) -> None: # noqa: N802 + self.send_response(204) + self._maybe_write_cors_headers() + self.end_headers() + + def do_GET(self) -> None: # noqa: N802 + parsed = urlparse(self.path) + if parsed.path != expected_path: + self.send_response(404) + self.end_headers() + self.wfile.write(b"Not found.") + return + + params = parse_qs(parsed.query) + result["code"] = params.get("code", [None])[0] + result["state"] = params.get("state", [None])[0] + result["error"] = params.get("error", [None])[0] + result["error_description"] = params.get("error_description", [None])[0] + + self.send_response(200) + self._maybe_write_cors_headers() + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + if result["error"]: + body = "

xAI authorization failed.

You can close this tab." + else: + body = "

xAI authorization received.

You can close this tab." + self.wfile.write(body.encode("utf-8")) + + def log_message(self, format: str, *args: Any) -> None: # noqa: A003 + return + + return _XAICallbackHandler, result + + +def _xai_start_callback_server( + preferred_port: int = XAI_OAUTH_REDIRECT_PORT, +) -> tuple[HTTPServer, threading.Thread, dict[str, Any], str]: + host = XAI_OAUTH_REDIRECT_HOST + expected_path = XAI_OAUTH_REDIRECT_PATH + handler_cls, result = _make_xai_callback_handler(expected_path) + + class _ReuseHTTPServer(HTTPServer): + allow_reuse_address = True + + ports_to_try = [preferred_port] + if preferred_port != 0: + ports_to_try.append(0) + server = None + last_error: Optional[OSError] = None + for port in ports_to_try: + try: + server = _ReuseHTTPServer((host, port), handler_cls) + break + except OSError as exc: + last_error = exc + if server is None: + raise AuthError( + f"Could not bind xAI callback server on {host}:{preferred_port}: {last_error}", + provider="xai-oauth", + code="xai_callback_bind_failed", + ) from last_error + + actual_port = int(server.server_address[1]) + redirect_uri = f"http://{host}:{actual_port}{expected_path}" + thread = threading.Thread( + target=server.serve_forever, + kwargs={"poll_interval": 0.1}, + daemon=True, + ) + thread.start() + return server, thread, result, redirect_uri + + +def _xai_wait_for_callback( + server: HTTPServer, + thread: threading.Thread, + result: dict[str, Any], + *, + timeout_seconds: float = 180.0, +) -> dict[str, Any]: + deadline = time.monotonic() + max(5.0, timeout_seconds) + try: + while time.monotonic() < deadline: + if result["code"] or result["error"]: + return result + time.sleep(0.1) + finally: + server.shutdown() + server.server_close() + thread.join(timeout=1.0) + raise AuthError( + "xAI authorization timed out waiting for the local callback.", + provider="xai-oauth", + code="xai_callback_timeout", + ) + + def _spotify_token_payload_to_state( token_payload: Dict[str, Any], *, @@ -2680,6 +2859,348 @@ def resolve_codex_runtime_credentials( } +# ============================================================================= +# xAI Grok OAuth — tokens stored in ~/.hermes/auth.json +# ============================================================================= + +def _read_xai_oauth_tokens(*, _lock: bool = True) -> Dict[str, Any]: + if _lock: + with _auth_store_lock(): + auth_store = _load_auth_store() + else: + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "xai-oauth") + if not state: + raise AuthError( + "No xAI OAuth credentials stored. Select xAI Grok OAuth (SuperGrok Subscription) in `hermes model`.", + provider="xai-oauth", + code="xai_auth_missing", + relogin_required=True, + ) + tokens = state.get("tokens") + if not isinstance(tokens, dict): + raise AuthError( + "xAI OAuth state is missing tokens. Re-authenticate with `hermes model`.", + provider="xai-oauth", + code="xai_auth_invalid_shape", + relogin_required=True, + ) + access_token = str(tokens.get("access_token", "") or "").strip() + refresh_token = str(tokens.get("refresh_token", "") or "").strip() + if not access_token: + raise AuthError( + "xAI OAuth state is missing access_token. Re-authenticate with `hermes model`.", + provider="xai-oauth", + code="xai_auth_missing_access_token", + relogin_required=True, + ) + if not refresh_token: + raise AuthError( + "xAI OAuth state is missing refresh_token. Re-authenticate with `hermes model`.", + provider="xai-oauth", + code="xai_auth_missing_refresh_token", + relogin_required=True, + ) + return { + "tokens": tokens, + "last_refresh": state.get("last_refresh"), + "discovery": state.get("discovery") or {}, + "redirect_uri": state.get("redirect_uri"), + } + + +def _save_xai_oauth_tokens( + tokens: Dict[str, Any], + *, + discovery: Optional[Dict[str, Any]] = None, + redirect_uri: str = "", + last_refresh: Optional[str] = None, +) -> None: + if last_refresh is None: + last_refresh = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "xai-oauth") or {} + state["tokens"] = tokens + state["last_refresh"] = last_refresh + state["auth_mode"] = "oauth_pkce" + if discovery: + state["discovery"] = discovery + if redirect_uri: + state["redirect_uri"] = redirect_uri + _save_provider_state(auth_store, "xai-oauth", state) + _save_auth_store(auth_store) + + +def _xai_access_token_is_expiring(access_token: str, skew_seconds: int = 0) -> bool: + if not isinstance(access_token, str) or "." not in access_token: + return False + try: + parts = access_token.split(".") + if len(parts) < 2: + return False + payload_b64 = parts[1] + payload_b64 += "=" * (-len(payload_b64) % 4) + payload = json.loads(base64.urlsafe_b64decode(payload_b64.encode("ascii")).decode("utf-8")) + exp = payload.get("exp") + if not isinstance(exp, (int, float)): + return False + return float(exp) <= (time.time() + max(0, int(skew_seconds))) + except Exception: + return False + + +def _xai_validate_oauth_endpoint(url: str, *, field: str) -> str: + """Refuse any OIDC discovery endpoint that isn't HTTPS on the xAI origin. + + The OIDC discovery response is a long-lived, low-frequency request whose + output is cached in ``~/.hermes/auth.json``. A single MITM during initial + login could substitute a malicious ``token_endpoint``; that URL would + then receive the refresh_token on every subsequent refresh — a permanent + credential leak from a one-time MITM. Validating scheme + host pins the + cached endpoint to the xAI auth origin (or a future ``*.x.ai`` subdomain + if xAI migrates) so the cache poisoning loses its persistence guarantee. + + RFC 8414 §2 requires the issuer to be ``https://`` and SHOULD-keeps the + token_endpoint on the same origin; we enforce both. ``x.ai`` is the + bare apex, so we accept either exact host match or any ``.x.ai`` suffix. + """ + parsed = urlparse(url) + if parsed.scheme != "https": + raise AuthError( + f"xAI OIDC discovery returned a non-HTTPS {field}: {url!r}.", + provider="xai-oauth", + code="xai_discovery_invalid", + ) + host = (parsed.hostname or "").lower() + if not host: + raise AuthError( + f"xAI OIDC discovery {field} is missing a hostname: {url!r}.", + provider="xai-oauth", + code="xai_discovery_invalid", + ) + if host != "x.ai" and not host.endswith(".x.ai"): + raise AuthError( + f"xAI OIDC discovery {field} host {host!r} is not on the xAI origin " + f"(expected x.ai or a *.x.ai subdomain). Refusing to use a cached " + f"endpoint that may have been substituted by a MITM during initial " + f"discovery; re-authenticate with `hermes model` to re-fetch.", + provider="xai-oauth", + code="xai_discovery_invalid", + ) + return url + + +def _xai_oauth_discovery(timeout_seconds: float = 15.0) -> Dict[str, str]: + try: + response = httpx.get( + XAI_OAUTH_DISCOVERY_URL, + headers={"Accept": "application/json"}, + timeout=timeout_seconds, + ) + except Exception as exc: + raise AuthError( + f"xAI OIDC discovery failed: {exc}", + provider="xai-oauth", + code="xai_discovery_failed", + ) from exc + if response.status_code != 200: + raise AuthError( + f"xAI OIDC discovery returned status {response.status_code}.", + provider="xai-oauth", + code="xai_discovery_failed", + ) + try: + payload = response.json() + except Exception as exc: + raise AuthError( + f"xAI OIDC discovery returned invalid JSON: {exc}", + provider="xai-oauth", + code="xai_discovery_invalid_json", + ) from exc + if not isinstance(payload, dict): + raise AuthError( + "xAI OIDC discovery response was not a JSON object.", + provider="xai-oauth", + code="xai_discovery_incomplete", + ) + authorization_endpoint = str(payload.get("authorization_endpoint", "") or "").strip() + token_endpoint = str(payload.get("token_endpoint", "") or "").strip() + if not authorization_endpoint or not token_endpoint: + raise AuthError( + "xAI OIDC discovery response was missing required endpoints.", + provider="xai-oauth", + code="xai_discovery_incomplete", + ) + _xai_validate_oauth_endpoint(authorization_endpoint, field="authorization_endpoint") + _xai_validate_oauth_endpoint(token_endpoint, field="token_endpoint") + return { + "authorization_endpoint": authorization_endpoint, + "token_endpoint": token_endpoint, + } + + +def refresh_xai_oauth_pure( + access_token: str, + refresh_token: str, + *, + token_endpoint: str = "", + timeout_seconds: float = 20.0, +) -> Dict[str, Any]: + del access_token + if not isinstance(refresh_token, str) or not refresh_token.strip(): + raise AuthError( + "xAI OAuth is missing refresh_token. Re-authenticate with `hermes model`.", + provider="xai-oauth", + code="xai_auth_missing_refresh_token", + relogin_required=True, + ) + endpoint = token_endpoint.strip() or _xai_oauth_discovery(timeout_seconds)["token_endpoint"] + # Re-validate cached endpoints on the refresh hot path: an auth.json + # written by an older Hermes (or hand-edited) may carry a non-xAI + # token_endpoint that would receive every future refresh_token in + # plaintext if we trusted it blindly. Cheap suffix check; fast-fail + # with a clear error so the user can re-run `hermes model` to refetch. + _xai_validate_oauth_endpoint(endpoint, field="token_endpoint") + timeout = httpx.Timeout(max(5.0, float(timeout_seconds))) + with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}) as client: + response = client.post( + endpoint, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data={ + "grant_type": "refresh_token", + "client_id": XAI_OAUTH_CLIENT_ID, + "refresh_token": refresh_token, + }, + ) + if response.status_code != 200: + detail = response.text.strip() + raise AuthError( + "xAI token refresh failed." + + (f" Response: {detail}" if detail else ""), + provider="xai-oauth", + code="xai_refresh_failed", + relogin_required=(response.status_code in {400, 401, 403}), + ) + try: + payload = response.json() + except Exception as exc: + raise AuthError( + f"xAI token refresh returned invalid JSON: {exc}", + provider="xai-oauth", + code="xai_refresh_invalid_json", + ) from exc + if not isinstance(payload, dict): + raise AuthError( + "xAI token refresh response was not a JSON object.", + provider="xai-oauth", + code="xai_refresh_invalid_response", + relogin_required=True, + ) + refreshed_access = str(payload.get("access_token", "") or "").strip() + if not refreshed_access: + raise AuthError( + "xAI token refresh response was missing access_token.", + provider="xai-oauth", + code="xai_refresh_missing_access_token", + relogin_required=True, + ) + updated = { + "access_token": refreshed_access, + "refresh_token": str(payload.get("refresh_token") or refresh_token).strip(), + "id_token": str(payload.get("id_token") or "").strip(), + "expires_in": payload.get("expires_in"), + "token_type": str(payload.get("token_type") or "Bearer").strip() or "Bearer", + "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + } + return updated + + +def _refresh_xai_oauth_tokens( + tokens: Dict[str, Any], + *, + token_endpoint: str, + redirect_uri: str = "", + timeout_seconds: float, +) -> Dict[str, Any]: + refreshed = refresh_xai_oauth_pure( + str(tokens.get("access_token", "") or ""), + str(tokens.get("refresh_token", "") or ""), + token_endpoint=token_endpoint, + timeout_seconds=timeout_seconds, + ) + updated_tokens = dict(tokens) + updated_tokens["access_token"] = refreshed["access_token"] + updated_tokens["refresh_token"] = refreshed["refresh_token"] + if refreshed.get("id_token"): + updated_tokens["id_token"] = refreshed["id_token"] + if refreshed.get("expires_in") is not None: + updated_tokens["expires_in"] = refreshed["expires_in"] + if refreshed.get("token_type"): + updated_tokens["token_type"] = refreshed["token_type"] + _save_xai_oauth_tokens( + updated_tokens, + discovery={"token_endpoint": token_endpoint}, + redirect_uri=redirect_uri, + last_refresh=refreshed["last_refresh"], + ) + return updated_tokens + + +def resolve_xai_oauth_runtime_credentials( + *, + force_refresh: bool = False, + refresh_if_expiring: bool = True, + refresh_skew_seconds: int = XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, +) -> Dict[str, Any]: + data = _read_xai_oauth_tokens() + tokens = dict(data["tokens"]) + access_token = str(tokens.get("access_token", "") or "").strip() + refresh_timeout_seconds = float(os.getenv("HERMES_XAI_REFRESH_TIMEOUT_SECONDS", "20")) + discovery = dict(data.get("discovery") or {}) + token_endpoint = str(discovery.get("token_endpoint", "") or "").strip() + redirect_uri = str(data.get("redirect_uri", "") or "").strip() + + should_refresh = bool(force_refresh) + if (not should_refresh) and refresh_if_expiring: + should_refresh = _xai_access_token_is_expiring(access_token, refresh_skew_seconds) + if should_refresh: + with _auth_store_lock(timeout_seconds=max(float(AUTH_LOCK_TIMEOUT_SECONDS), refresh_timeout_seconds + 5.0)): + data = _read_xai_oauth_tokens(_lock=False) + tokens = dict(data["tokens"]) + access_token = str(tokens.get("access_token", "") or "").strip() + discovery = dict(data.get("discovery") or {}) + token_endpoint = str(discovery.get("token_endpoint", "") or "").strip() + redirect_uri = str(data.get("redirect_uri", "") or "").strip() + should_refresh = bool(force_refresh) + if (not should_refresh) and refresh_if_expiring: + should_refresh = _xai_access_token_is_expiring(access_token, refresh_skew_seconds) + if should_refresh: + if not token_endpoint: + token_endpoint = _xai_oauth_discovery(refresh_timeout_seconds)["token_endpoint"] + tokens = _refresh_xai_oauth_tokens( + tokens, + token_endpoint=token_endpoint, + redirect_uri=redirect_uri, + timeout_seconds=refresh_timeout_seconds, + ) + access_token = str(tokens.get("access_token", "") or "").strip() + + base_url = ( + os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/") + or os.getenv("XAI_BASE_URL", "").strip().rstrip("/") + or DEFAULT_XAI_OAUTH_BASE_URL + ) + return { + "provider": "xai-oauth", + "base_url": base_url, + "api_key": access_token, + "source": "hermes-auth-store", + "last_refresh": data.get("last_refresh"), + "auth_mode": "oauth_pkce", + } + + # ============================================================================= # TLS verification helper # ============================================================================= @@ -4030,6 +4551,48 @@ def get_codex_auth_status() -> Dict[str, Any]: } +def get_xai_oauth_auth_status() -> Dict[str, Any]: + try: + from agent.credential_pool import load_pool + + pool = load_pool("xai-oauth") + if pool and pool.has_credentials(): + entry = pool.select() + if entry is not None: + api_key = ( + getattr(entry, "runtime_api_key", None) + or getattr(entry, "access_token", "") + ) + if api_key and not _xai_access_token_is_expiring(api_key, 0): + return { + "logged_in": True, + "auth_store": str(_auth_file_path()), + "last_refresh": getattr(entry, "last_refresh", None), + "auth_mode": "oauth_pkce", + "source": f"pool:{getattr(entry, 'label', 'unknown')}", + "api_key": api_key, + } + except Exception: + pass + + try: + creds = resolve_xai_oauth_runtime_credentials() + return { + "logged_in": True, + "auth_store": str(_auth_file_path()), + "last_refresh": creds.get("last_refresh"), + "auth_mode": creds.get("auth_mode"), + "source": creds.get("source"), + "api_key": creds.get("api_key"), + } + except AuthError as exc: + return { + "logged_in": False, + "auth_store": str(_auth_file_path()), + "error": str(exc), + } + + def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]: """Status snapshot for API-key providers (z.ai, Kimi, MiniMax).""" pconfig = PROVIDER_REGISTRY.get(provider_id) @@ -4100,6 +4663,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: return get_nous_auth_status() if target == "openai-codex": return get_codex_auth_status() + if target == "xai-oauth": + return get_xai_oauth_auth_status() if target == "qwen-oauth": return get_qwen_auth_status() if target == "google-gemini-cli": @@ -4320,7 +4885,7 @@ def _logout_default_provider_from_config() -> Optional[str]: "No provider is currently logged in" and never reset model.provider. """ provider = _get_config_provider() - if provider in {"nous", "openai-codex"}: + if provider in {"nous", "openai-codex", "xai-oauth"}: return provider return None @@ -4619,6 +5184,245 @@ def _login_openai_codex( print(f" Config updated: {config_path} (model.provider=openai-codex)") +def _login_xai_oauth( + args, + pconfig: ProviderConfig, + *, + force_new_login: bool = False, +) -> None: + del pconfig + + if not force_new_login: + try: + existing = resolve_xai_oauth_runtime_credentials() + api_key = existing.get("api_key", "") + if isinstance(api_key, str) and api_key and not _xai_access_token_is_expiring(api_key, 60): + print("Existing xAI OAuth credentials found in Hermes auth store.") + try: + reuse = input("Use existing credentials? [Y/n]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + reuse = "y" + if reuse in ("", "y", "yes"): + config_path = _update_config_for_provider( + "xai-oauth", + existing.get("base_url", DEFAULT_XAI_OAUTH_BASE_URL), + ) + print() + print("Login successful!") + print(f" Config updated: {config_path} (model.provider=xai-oauth)") + return + except AuthError: + pass + + print() + print("Signing in to xAI Grok OAuth (SuperGrok Subscription)...") + print("(Hermes creates its own local OAuth session)") + print() + + timeout_seconds = float(getattr(args, "timeout", None) or 20.0) + open_browser = not getattr(args, "no_browser", False) + if _is_remote_session(): + open_browser = False + + creds = _xai_oauth_loopback_login(timeout_seconds=timeout_seconds, open_browser=open_browser) + _save_xai_oauth_tokens( + creds["tokens"], + discovery=creds.get("discovery"), + redirect_uri=creds.get("redirect_uri", ""), + last_refresh=creds.get("last_refresh"), + ) + config_path = _update_config_for_provider("xai-oauth", creds.get("base_url", DEFAULT_XAI_OAUTH_BASE_URL)) + print() + print("Login successful!") + from hermes_constants import display_hermes_home as _dhh + print(f" Auth state: {_dhh()}/auth.json") + print(f" Config updated: {config_path} (model.provider=xai-oauth)") + + +def _xai_oauth_build_authorize_url( + *, + authorization_endpoint: str, + redirect_uri: str, + code_challenge: str, + state: str, + nonce: str, +) -> str: + # `plan=generic` opts the consent screen into xAI's generic OAuth plan + # tier instead of falling back to the per-account default. Without it, + # accounts.x.ai rejects loopback OAuth from non-allowlisted clients. + # `referrer=hermes-agent` lets xAI attribute Hermes-originated logins + # in their OAuth server logs (we still impersonate the upstream Grok-CLI + # client_id; this is best-effort attribution until xAI mints us our own). + authorize_params = { + "response_type": "code", + "client_id": XAI_OAUTH_CLIENT_ID, + "redirect_uri": redirect_uri, + "scope": XAI_OAUTH_SCOPE, + "code_challenge": code_challenge, + "code_challenge_method": "S256", + "state": state, + "nonce": nonce, + "plan": "generic", + "referrer": "hermes-agent", + } + return f"{authorization_endpoint}?{urlencode(authorize_params)}" + + +def _xai_oauth_loopback_login( + *, + timeout_seconds: float = 20.0, + open_browser: bool = True, +) -> Dict[str, Any]: + discovery = _xai_oauth_discovery(timeout_seconds) + authorization_endpoint = discovery["authorization_endpoint"] + token_endpoint = discovery["token_endpoint"] + + server, thread, callback_result, redirect_uri = _xai_start_callback_server() + try: + _xai_validate_loopback_redirect_uri(redirect_uri) + code_verifier = _oauth_pkce_code_verifier() + code_challenge = _oauth_pkce_code_challenge(code_verifier) + state = uuid.uuid4().hex + nonce = uuid.uuid4().hex + authorize_url = _xai_oauth_build_authorize_url( + authorization_endpoint=authorization_endpoint, + redirect_uri=redirect_uri, + code_challenge=code_challenge, + state=state, + nonce=nonce, + ) + + print("Open this URL to authorize Hermes with xAI:") + print(authorize_url) + print() + print(f"Waiting for callback on {redirect_uri}") + + if open_browser and not _is_remote_session(): + try: + opened = webbrowser.open(authorize_url) + except Exception: + opened = False + if opened: + print("Browser opened for xAI authorization.") + else: + print("Could not open the browser automatically; use the URL above.") + + callback = _xai_wait_for_callback( + server, + thread, + callback_result, + timeout_seconds=max(30.0, timeout_seconds * 9), + ) + except Exception: + try: + server.shutdown() + server.server_close() + except Exception: + pass + try: + thread.join(timeout=1.0) + except Exception: + pass + raise + + if callback.get("error"): + detail = callback.get("error_description") or callback["error"] + raise AuthError( + f"xAI authorization failed: {detail}", + provider="xai-oauth", + code="xai_authorization_failed", + ) + if callback.get("state") != state: + raise AuthError( + "xAI authorization failed: state mismatch.", + provider="xai-oauth", + code="xai_state_mismatch", + ) + code = str(callback.get("code") or "").strip() + if not code: + raise AuthError( + "xAI authorization failed: missing authorization code.", + provider="xai-oauth", + code="xai_code_missing", + ) + + try: + response = httpx.post( + token_endpoint, + headers={"Content-Type": "application/x-www-form-urlencoded", "Accept": "application/json"}, + data={ + "grant_type": "authorization_code", + "code": code, + "redirect_uri": redirect_uri, + "client_id": XAI_OAUTH_CLIENT_ID, + "code_verifier": code_verifier, + }, + timeout=max(20.0, timeout_seconds), + ) + except Exception as exc: + raise AuthError( + f"xAI token exchange failed: {exc}", + provider="xai-oauth", + code="xai_token_exchange_failed", + ) from exc + if response.status_code != 200: + detail = response.text.strip() + raise AuthError( + "xAI token exchange failed." + + (f" Response: {detail}" if detail else ""), + provider="xai-oauth", + code="xai_token_exchange_failed", + ) + try: + payload = response.json() + except Exception as exc: + raise AuthError( + f"xAI token exchange returned invalid JSON: {exc}", + provider="xai-oauth", + code="xai_token_exchange_invalid", + ) from exc + if not isinstance(payload, dict): + raise AuthError( + "xAI token exchange response was not a JSON object.", + provider="xai-oauth", + code="xai_token_exchange_invalid", + ) + access_token = str(payload.get("access_token", "") or "").strip() + refresh_token = str(payload.get("refresh_token", "") or "").strip() + if not access_token: + raise AuthError( + "xAI token exchange did not return an access_token.", + provider="xai-oauth", + code="xai_token_exchange_invalid", + ) + if not refresh_token: + raise AuthError( + "xAI token exchange did not return a refresh_token.", + provider="xai-oauth", + code="xai_token_exchange_invalid", + ) + + base_url = ( + os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/") + or os.getenv("XAI_BASE_URL", "").strip().rstrip("/") + or DEFAULT_XAI_OAUTH_BASE_URL + ) + return { + "tokens": { + "access_token": access_token, + "refresh_token": refresh_token, + "id_token": str(payload.get("id_token", "") or "").strip(), + "expires_in": payload.get("expires_in"), + "token_type": str(payload.get("token_type") or "Bearer").strip() or "Bearer", + }, + "discovery": discovery, + "redirect_uri": redirect_uri, + "base_url": base_url, + "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "source": "oauth-loopback", + } + + def _codex_device_code_login() -> Dict[str, Any]: """Run the OpenAI device code login flow and return credentials dict.""" import time as _time diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index 65cb7ed1b85..10b040d8a1d 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -33,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL # Providers that support OAuth login in addition to API keys. -_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} +_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "xai-oauth", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} def _get_custom_provider_names() -> list: @@ -77,6 +77,8 @@ def _normalize_provider(provider: str) -> str: normalized = (provider or "").strip().lower() if normalized in {"or", "open-router"}: return "openrouter" + if normalized in {"grok-oauth", "xai-oauth", "x-ai-oauth", "xai-grok-oauth"}: + return "xai-oauth" # Check if it matches a custom provider name custom_key = _resolve_custom_provider_input(normalized) if custom_key: @@ -170,7 +172,7 @@ def auth_add_command(args) -> None: if provider.startswith(CUSTOM_POOL_PREFIX): requested_type = AUTH_TYPE_API_KEY else: - requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} else AUTH_TYPE_API_KEY + requested_type = AUTH_TYPE_OAUTH if provider in _OAUTH_CAPABLE_PROVIDERS else AUTH_TYPE_API_KEY pool = load_pool(provider) @@ -333,6 +335,31 @@ def auth_add_command(args) -> None: print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') return + if provider == "xai-oauth": + creds = auth_mod._xai_oauth_loopback_login( + timeout_seconds=getattr(args, "timeout", None) or 20.0, + open_browser=not getattr(args, "no_browser", False), + ) + label = (getattr(args, "label", None) or "").strip() or label_from_token( + creds["tokens"]["access_token"], + _oauth_default_label(provider, len(pool.entries()) + 1), + ) + entry = PooledCredential( + provider=provider, + id=uuid.uuid4().hex[:6], + label=label, + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source=f"{SOURCE_MANUAL}:xai_pkce", + access_token=creds["tokens"]["access_token"], + refresh_token=creds["tokens"].get("refresh_token"), + base_url=creds.get("base_url"), + last_refresh=creds.get("last_refresh"), + ) + pool.add_entry(entry) + print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') + return + if provider == "google-gemini-cli": from agent.google_oauth import run_gemini_oauth_login_pure diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 833172a23b9..c7ac1100816 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1932,6 +1932,8 @@ def select_provider_and_model(args=None): _model_flow_nous(config, current_model, args=args) elif selected_provider == "openai-codex": _model_flow_openai_codex(config, current_model) + elif selected_provider == "xai-oauth": + _model_flow_xai_oauth(config, current_model) elif selected_provider == "qwen-oauth": _model_flow_qwen_oauth(config, current_model) elif selected_provider == "minimax-oauth": @@ -2813,6 +2815,87 @@ def _model_flow_openai_codex(config, current_model=""): print("No change.") +def _model_flow_xai_oauth(_config, current_model=""): + """xAI Grok OAuth (SuperGrok Subscription) provider: ensure logged in, then pick model.""" + from hermes_cli.auth import ( + get_xai_oauth_auth_status, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + resolve_xai_oauth_runtime_credentials, + _login_xai_oauth, + DEFAULT_XAI_OAUTH_BASE_URL, + PROVIDER_REGISTRY, + ) + from hermes_cli.models import _PROVIDER_MODELS + + status = get_xai_oauth_auth_status() + if status.get("logged_in"): + print(" xAI Grok OAuth (SuperGrok Subscription) credentials: ✓") + print() + print(" 1. Use existing credentials") + print(" 2. Reauthenticate (new OAuth login)") + print(" 3. Cancel") + print() + try: + choice = input(" Choice [1/2/3]: ").strip() + except (KeyboardInterrupt, EOFError): + choice = "1" + + if choice == "2": + print("Starting a fresh xAI OAuth login...") + print() + try: + mock_args = argparse.Namespace() + _login_xai_oauth( + mock_args, + PROVIDER_REGISTRY["xai-oauth"], + force_new_login=True, + ) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + elif choice == "3": + return + else: + print("Not logged into xAI Grok OAuth (SuperGrok Subscription). Starting login...") + print() + try: + mock_args = argparse.Namespace() + _login_xai_oauth(mock_args, PROVIDER_REGISTRY["xai-oauth"]) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + + # Resolve a usable base URL. ``resolve_xai_oauth_runtime_credentials`` + # only reads from the auth.json singleton — but credentials may legitimately + # live only in the pool (e.g. after ``hermes auth add xai-oauth``). Fall + # back to the default base URL in that case so the model picker still + # completes successfully instead of bailing out with + # ``Could not resolve xAI OAuth credentials``. + base_url = DEFAULT_XAI_OAUTH_BASE_URL + try: + creds = resolve_xai_oauth_runtime_credentials() + base_url = (creds.get("base_url") or "").strip().rstrip("/") or base_url + except Exception: + pass + + models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or []) + selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-code-fast-1")) + if selected: + _save_model_choice(selected) + _update_config_for_provider("xai-oauth", base_url) + print(f"Default model set to: {selected} (via xAI Grok OAuth — SuperGrok Subscription)") + else: + print("No change.") + + _DEFAULT_QWEN_PORTAL_MODELS = [ "qwen3-coder-plus", "qwen3-coder", @@ -9400,7 +9483,7 @@ def _build_provider_choices() -> list[str]: except Exception: # Fallback: static list guarantees the CLI always works return [ - "auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", + "auto", "openrouter", "nous", "openai-codex", "xai-oauth", "copilot-acp", "copilot", "anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "kilocode", "novita", "xiaomi", "arcee", @@ -9931,7 +10014,7 @@ def main(): ) login_parser.add_argument( "--provider", - choices=["nous", "openai-codex"], + choices=["nous", "openai-codex", "xai-oauth"], default=None, help="Provider to authenticate with (default: nous)", ) @@ -9977,7 +10060,7 @@ def main(): ) logout_parser.add_argument( "--provider", - choices=["nous", "openai-codex", "spotify"], + choices=["nous", "openai-codex", "xai-oauth", "spotify"], default=None, help="Provider to log out from (default: active provider)", ) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index bc41132f5d5..ded3f448f87 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -116,13 +116,23 @@ def _codex_curated_models() -> list[str]: # (grok-4, grok-4-0709, grok-4-fast{,-reasoning,-non-reasoning}, # grok-4-1-fast{,-reasoning,-non-reasoning}, grok-code-fast-1 → grok-4.3). _XAI_STATIC_FALLBACK: list[str] = [ + "grok-4.3", "grok-4.20-0309-reasoning", "grok-4.20-0309-non-reasoning", "grok-4.20-multi-agent-0309", - "grok-4.3", ] +_XAI_TOP_MODEL = "grok-4.3" + + +def _xai_promote_top(ids: list[str]) -> list[str]: + """Pin the headline xAI model to the top of the curated list.""" + if _XAI_TOP_MODEL in ids: + return [_XAI_TOP_MODEL] + [m for m in ids if m != _XAI_TOP_MODEL] + return ids + + def _xai_curated_models() -> list[str]: """Derive the xAI-direct curated list from models.dev disk cache. @@ -142,7 +152,7 @@ def _xai_curated_models() -> list[str]: if isinstance(models, dict) and models: ids = [mid for mid in models.keys() if isinstance(mid, str)] if ids: - return sorted(ids) + return _xai_promote_top(sorted(ids)) except Exception: # Any failure (missing file, malformed JSON, import error) # falls through to the static list. @@ -190,6 +200,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "gpt-4o-mini", ], "openai-codex": _codex_curated_models(), + "xai-oauth": _xai_curated_models(), "copilot-acp": [ "copilot-acp", ], @@ -918,6 +929,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"), ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"), ProviderEntry("alibaba", "Qwen Cloud", "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"), + ProviderEntry("xai-oauth", "xAI Grok OAuth (SuperGrok Subscription)", "xAI Grok OAuth (SuperGrok Subscription)"), ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"), ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"), ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"), @@ -1036,6 +1048,10 @@ _PROVIDER_ALIASES = { "amazon-bedrock": "bedrock", "amazon": "bedrock", "grok": "xai", + "grok-oauth": "xai-oauth", + "xai-oauth": "xai-oauth", + "x-ai-oauth": "xai-oauth", + "xai-grok-oauth": "xai-oauth", "x-ai": "xai", "x.ai": "xai", "nim": "nvidia", @@ -2166,6 +2182,8 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) except Exception: access_token = None return get_codex_model_ids(access_token=access_token) + if normalized == "xai-oauth": + return list(_PROVIDER_MODELS.get("xai-oauth", _PROVIDER_MODELS.get("xai", []))) if normalized in {"copilot", "copilot-acp"}: try: live = _fetch_github_models(_resolve_copilot_catalog_api_key()) @@ -3444,14 +3462,14 @@ def validate_requested_model( "message": message, } - # OpenAI Codex has its own catalog path; /v1/models probing is not the right validation path. - if normalized == "openai-codex": + # Providers with non-standard catalog validation — /v1/models probing is not the right path. + if normalized in {"openai-codex", "xai-oauth"}: try: - codex_models = provider_model_ids("openai-codex") + catalog_models = provider_model_ids(normalized) except Exception: - codex_models = [] - if codex_models: - if requested_for_lookup in set(codex_models): + catalog_models = [] + if catalog_models: + if requested_for_lookup in set(catalog_models): return { "accepted": True, "persist": True, @@ -3459,7 +3477,7 @@ def validate_requested_model( "message": None, } # Auto-correct if the top match is very similar (e.g. typo) - auto = get_close_matches(requested_for_lookup, codex_models, n=1, cutoff=0.9) + auto = get_close_matches(requested_for_lookup, catalog_models, n=1, cutoff=0.9) if auto: return { "accepted": True, @@ -3468,17 +3486,18 @@ def validate_requested_model( "corrected_model": auto[0], "message": f"Auto-corrected `{requested}` → `{auto[0]}`", } - suggestions = get_close_matches(requested_for_lookup, codex_models, n=3, cutoff=0.5) + suggestions = get_close_matches(requested_for_lookup, catalog_models, n=3, cutoff=0.5) suggestion_text = "" if suggestions: suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) + provider_label = "OpenAI Codex" if normalized == "openai-codex" else "xAI Grok OAuth (SuperGrok Subscription)" return { "accepted": True, "persist": True, "recognized": False, "message": ( - f"Note: `{requested}` was not found in the OpenAI Codex model listing. " - "It may still work if your ChatGPT/Codex account has access to a newer or hidden model ID." + f"Note: `{requested}` was not found in the {provider_label} model listing. " + "It may still work if your account has access to a newer or hidden model ID." f"{suggestion_text}" ), } diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index 08fc173dc69..9243b3f6f84 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -60,6 +60,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { auth_type="oauth_external", base_url_override="https://chatgpt.com/backend-api/codex", ), + "xai-oauth": HermesOverlay( + transport="codex_responses", + auth_type="oauth_external", + base_url_override="https://api.x.ai/v1", + base_url_env_var="XAI_BASE_URL", + ), "qwen-oauth": HermesOverlay( transport="openai_chat", auth_type="oauth_external", @@ -244,6 +250,10 @@ ALIASES: Dict[str, str] = { "x-ai": "xai", "x.ai": "xai", "grok": "xai", + "grok-oauth": "xai-oauth", + "xai-oauth": "xai-oauth", + "x-ai-oauth": "xai-oauth", + "xai-grok-oauth": "xai-oauth", # nvidia "nim": "nvidia", diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index d7c30fe5648..c0baf14db92 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -15,12 +15,14 @@ from hermes_cli.auth import ( AuthError, DEFAULT_CODEX_BASE_URL, DEFAULT_QWEN_BASE_URL, + DEFAULT_XAI_OAUTH_BASE_URL, PROVIDER_REGISTRY, _agent_key_is_usable, format_auth_error, resolve_provider, resolve_nous_runtime_credentials, resolve_codex_runtime_credentials, + resolve_xai_oauth_runtime_credentials, resolve_qwen_runtime_credentials, resolve_gemini_oauth_runtime_credentials, resolve_api_key_provider_credentials, @@ -238,6 +240,9 @@ def _resolve_runtime_from_pool_entry( if provider == "openai-codex": api_mode = "codex_responses" base_url = base_url or DEFAULT_CODEX_BASE_URL + elif provider == "xai-oauth": + api_mode = "codex_responses" + base_url = base_url or DEFAULT_XAI_OAUTH_BASE_URL elif provider == "qwen-oauth": api_mode = "chat_completions" base_url = base_url or DEFAULT_QWEN_BASE_URL @@ -1132,6 +1137,24 @@ def resolve_runtime_provider( logger.info("Auto-detected Codex provider but credentials failed; " "falling through to next provider.") + if provider == "xai-oauth": + try: + creds = resolve_xai_oauth_runtime_credentials() + return { + "provider": "xai-oauth", + "api_mode": "codex_responses", + "base_url": (creds.get("base_url") or "").rstrip("/") or DEFAULT_XAI_OAUTH_BASE_URL, + "api_key": creds.get("api_key", ""), + "source": creds.get("source", "hermes-auth-store"), + "last_refresh": creds.get("last_refresh"), + "requested_provider": requested_provider, + } + except AuthError: + if requested_provider != "auto": + raise + logger.info("Auto-detected xAI OAuth provider but credentials failed; " + "falling through to next provider.") + if provider == "qwen-oauth": try: creds = resolve_qwen_runtime_credentials() diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 5d635b2c464..50e198b9dc7 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -1091,6 +1091,58 @@ def _install_kittentts_deps() -> bool: return False +def _xai_oauth_logged_in_for_setup() -> bool: + """True iff xAI Grok OAuth credentials are already stored locally. + + Lets TTS / STT setup skip the API-key prompt for users who logged in + through ``hermes model`` -> xAI Grok OAuth (SuperGrok Subscription). + """ + try: + from hermes_cli.auth import get_xai_oauth_auth_status + + return bool(get_xai_oauth_auth_status().get("logged_in")) + except Exception: + return False + + +def _run_xai_oauth_login_from_setup() -> bool: + """Run the xAI Grok OAuth loopback login from inside the setup wizard. + + Returns True on success, False on any failure (the caller falls back + to whatever the user picked next, e.g. Edge TTS). + """ + try: + from hermes_cli.auth import ( + DEFAULT_XAI_OAUTH_BASE_URL, + _is_remote_session, + _save_xai_oauth_tokens, + _update_config_for_provider, + _xai_oauth_loopback_login, + ) + except Exception as exc: + print_warning(f"xAI Grok OAuth helpers unavailable: {exc}") + return False + + open_browser = not _is_remote_session() + print() + print_info("Signing in to xAI Grok OAuth (SuperGrok Subscription)...") + try: + creds = _xai_oauth_loopback_login(open_browser=open_browser) + _save_xai_oauth_tokens( + creds["tokens"], + discovery=creds.get("discovery"), + redirect_uri=creds.get("redirect_uri", ""), + last_refresh=creds.get("last_refresh"), + ) + _update_config_for_provider( + "xai-oauth", creds.get("base_url", DEFAULT_XAI_OAUTH_BASE_URL) + ) + return True + except Exception as exc: + print_warning(f"xAI Grok OAuth login failed: {exc}") + return False + + def _setup_tts_provider(config: dict): """Interactive TTS provider selection with install flow for NeuTTS.""" tts_config = config.get("tts", {}) @@ -1125,7 +1177,7 @@ def _setup_tts_provider(config: dict): "Edge TTS (free, cloud-based, no setup needed)", "ElevenLabs (premium quality, needs API key)", "OpenAI TTS (good quality, needs API key)", - "xAI TTS (Grok voices, needs API key)", + "xAI TTS (Grok voices — OAuth login or API key)", "MiniMax TTS (high quality with voice cloning, needs API key)", "Mistral Voxtral TTS (multilingual, native Opus, needs API key)", "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)", @@ -1199,21 +1251,59 @@ def _setup_tts_provider(config: dict): selected = "edge" elif selected == "xai": - existing = get_env_value("XAI_API_KEY") - if not existing: + # Resolution order: existing OAuth tokens (free for SuperGrok subscribers + # via the Hermes auth store) > existing XAI_API_KEY > prompt the user. + # When neither is configured, offer both options instead of forcing the + # API-key path — xAI TTS works fine with OAuth bearer tokens too. + oauth_logged_in = _xai_oauth_logged_in_for_setup() + existing_api_key = get_env_value("XAI_API_KEY") + + if oauth_logged_in: + print_success( + "xAI TTS will use your xAI Grok OAuth (SuperGrok Subscription) " + "credentials" + ) + elif existing_api_key: + print_success("xAI TTS will use your existing XAI_API_KEY") + else: print() - api_key = prompt("xAI API key for TTS", password=True) - if api_key: - save_env_value("XAI_API_KEY", api_key) - print_success("xAI TTS API key saved") + choice_idx = prompt_choice( + "How do you want xAI TTS to authenticate?", + choices=[ + "Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login", + "Paste an xAI API key (console.x.ai)", + "Skip → fallback to Edge TTS", + ], + default=0, + ) + if choice_idx == 0: + if _run_xai_oauth_login_from_setup(): + print_success( + "Logged in — xAI TTS will use these OAuth credentials" + ) + else: + print_warning( + "xAI Grok OAuth login did not complete. " + "Falling back to Edge TTS." + ) + selected = "edge" + elif choice_idx == 1: + api_key = prompt("xAI API key for TTS", password=True) + if api_key: + save_env_value("XAI_API_KEY", api_key) + print_success("xAI TTS API key saved") + else: + from hermes_constants import display_hermes_home as _dhh + print_warning( + "No xAI API key provided for TTS. Configure XAI_API_KEY " + f"via hermes setup model or {_dhh()}/.env to use xAI TTS. " + "Falling back to Edge TTS." + ) + selected = "edge" else: - from hermes_constants import display_hermes_home as _dhh - print_warning( - "No xAI API key provided for TTS. Configure XAI_API_KEY via " - f"hermes setup model or {_dhh()}/.env to use xAI TTS. " - "Falling back to Edge TTS." - ) + print_warning("xAI TTS skipped. Falling back to Edge TTS.") selected = "edge" + if selected == "xai": print() voice_id = prompt("xAI voice_id (Enter for 'eve', or paste a custom voice ID)") diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index fc5b1acf5cf..891ffdeb05a 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -194,11 +194,10 @@ TOOL_CATEGORIES = { }, { "name": "xAI TTS", - "tag": "Grok voices - requires xAI API key", - "env_vars": [ - {"key": "XAI_API_KEY", "prompt": "xAI API key", "url": "https://console.x.ai/"}, - ], + "tag": "Grok voices — uses xAI Grok OAuth or XAI_API_KEY", + "env_vars": [], "tts_provider": "xai", + "post_setup": "xai_grok", }, { "name": "ElevenLabs", @@ -925,6 +924,73 @@ def _run_post_setup(post_setup_key: str): _print_info(" Restart Hermes for tracing to take effect.") _print_info(" Verify: hermes plugins list") + elif post_setup_key == "xai_grok": + # Shared credential bootstrap for any picker entry that talks to xAI + # (TTS, Video Gen, future Image Gen, etc.). Accepts either a + # SuperGrok-tier OAuth bearer token (preferred — billed against the + # user's existing subscription) or a raw XAI_API_KEY from + # console.x.ai. The picker entries declare empty env_vars so we + # drive the full auth UX here. + try: + from hermes_cli.auth import get_xai_oauth_auth_status + oauth_logged_in = bool(get_xai_oauth_auth_status().get("logged_in")) + except Exception: + oauth_logged_in = False + existing_api_key = get_env_value("XAI_API_KEY") + + if oauth_logged_in: + _print_success( + " xAI will use your xAI Grok OAuth (SuperGrok Subscription) credentials" + ) + return + if existing_api_key: + _print_success(" xAI will use your existing XAI_API_KEY") + return + + _print_info(" xAI needs credentials. Choose one:") + try: + from hermes_cli.setup import ( + _run_xai_oauth_login_from_setup, + prompt_choice, + prompt as _setup_prompt, + ) + from hermes_cli.config import save_env_value + except Exception as exc: + _print_warning(f" Could not load setup helpers: {exc}") + _print_info(" Run later: hermes auth add xai-oauth (or set XAI_API_KEY)") + return + + idx = prompt_choice( + " How do you want xAI to authenticate?", + choices=[ + "Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login", + "Paste an xAI API key (console.x.ai)", + "Skip — configure later via `hermes auth add xai-oauth`", + ], + default=0, + ) + if idx == 0: + if _run_xai_oauth_login_from_setup(): + _print_success( + " Logged in — xAI will use these OAuth credentials" + ) + else: + _print_warning( + " xAI Grok OAuth login did not complete. " + "Run later: hermes auth add xai-oauth" + ) + elif idx == 1: + api_key = _setup_prompt(" xAI API key", password=True) + if api_key: + save_env_value("XAI_API_KEY", api_key) + _print_success(" XAI_API_KEY saved") + else: + _print_warning( + " No API key provided. Run later: hermes auth add xai-oauth" + ) + else: + _print_info(" xAI will remain inactive until credentials are configured.") + # ─── Platform / Toolset Helpers ─────────────────────────────────────────────── diff --git a/plugins/image_gen/xai/__init__.py b/plugins/image_gen/xai/__init__.py index ea8721075d0..d5aac4eccdd 100644 --- a/plugins/image_gen/xai/__init__.py +++ b/plugins/image_gen/xai/__init__.py @@ -31,7 +31,7 @@ from agent.image_gen_provider import ( save_b64_image, success_response, ) -from tools.xai_http import hermes_xai_user_agent +from tools.xai_http import hermes_xai_user_agent, resolve_xai_http_credentials logger = logging.getLogger(__name__) @@ -39,14 +39,17 @@ logger = logging.getLogger(__name__) # Model catalog # --------------------------------------------------------------------------- -API_MODEL = "grok-imagine-image" - _MODELS: Dict[str, Dict[str, Any]] = { "grok-imagine-image": { "display": "Grok Imagine Image", "speed": "~5-10s", "strengths": "Fast, high-quality", }, + "grok-imagine-image-quality": { + "display": "Grok Imagine Image (Quality)", + "speed": "~10-20s", + "strengths": "Higher fidelity / detail; slower than the standard model.", + }, } DEFAULT_MODEL = "grok-imagine-image" @@ -127,7 +130,8 @@ class XAIImageGenProvider(ImageGenProvider): return "xAI (Grok)" def is_available(self) -> bool: - return bool(os.getenv("XAI_API_KEY")) + creds = resolve_xai_http_credentials() + return bool(creds.get("api_key")) def list_models(self) -> List[Dict[str, Any]]: return [ @@ -141,17 +145,16 @@ class XAIImageGenProvider(ImageGenProvider): ] def get_setup_schema(self) -> Dict[str, Any]: + # Auth resolution is delegated to the shared ``xai_grok`` post_setup + # hook (``hermes_cli/tools_config.py``); identical to the TTS / video + # gen entries so users see the same OAuth-or-API-key choice for every + # xAI service. return { - "name": "xAI (Grok)", + "name": "xAI Grok Imagine (image)", "badge": "paid", - "tag": "Native xAI image generation via grok-imagine-image", - "env_vars": [ - { - "key": "XAI_API_KEY", - "prompt": "xAI API key", - "url": "https://console.x.ai/", - }, - ], + "tag": "grok-imagine-image — text-to-image; uses xAI Grok OAuth or XAI_API_KEY", + "env_vars": [], + "post_setup": "xai_grok", } def generate( @@ -161,12 +164,14 @@ class XAIImageGenProvider(ImageGenProvider): **kwargs: Any, ) -> Dict[str, Any]: """Generate an image using xAI's grok-imagine-image.""" - api_key = os.getenv("XAI_API_KEY", "").strip() + creds = resolve_xai_http_credentials() + api_key = str(creds.get("api_key") or "").strip() + provider_name = str(creds.get("provider") or "xai").strip() or "xai" if not api_key: return error_response( - error="XAI_API_KEY not set. Get one at https://console.x.ai/", + error="No xAI credentials found. Configure xAI OAuth in `hermes model` or set XAI_API_KEY.", error_type="missing_api_key", - provider="xai", + provider=provider_name, aspect_ratio=aspect_ratio, ) @@ -177,7 +182,7 @@ class XAIImageGenProvider(ImageGenProvider): xai_res = resolution if resolution in _XAI_RESOLUTIONS else DEFAULT_RESOLUTION payload: Dict[str, Any] = { - "model": API_MODEL, + "model": model_id, "prompt": prompt, "aspect_ratio": xai_ar, "resolution": xai_res, @@ -189,7 +194,7 @@ class XAIImageGenProvider(ImageGenProvider): "User-Agent": hermes_xai_user_agent(), } - base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/") + base_url = str(creds.get("base_url") or "https://api.x.ai/v1").strip().rstrip("/") try: response = requests.post( @@ -210,7 +215,7 @@ class XAIImageGenProvider(ImageGenProvider): return error_response( error=f"xAI image generation failed ({status}): {err_msg}", error_type="api_error", - provider="xai", + provider=provider_name, model=model_id, prompt=prompt, aspect_ratio=aspect, @@ -219,7 +224,7 @@ class XAIImageGenProvider(ImageGenProvider): return error_response( error="xAI image generation timed out (120s)", error_type="timeout", - provider="xai", + provider=provider_name, model=model_id, prompt=prompt, aspect_ratio=aspect, @@ -228,7 +233,7 @@ class XAIImageGenProvider(ImageGenProvider): return error_response( error=f"xAI connection error: {exc}", error_type="connection_error", - provider="xai", + provider=provider_name, model=model_id, prompt=prompt, aspect_ratio=aspect, @@ -240,7 +245,7 @@ class XAIImageGenProvider(ImageGenProvider): return error_response( error=f"xAI returned invalid JSON: {exc}", error_type="invalid_response", - provider="xai", + provider=provider_name, model=model_id, prompt=prompt, aspect_ratio=aspect, @@ -252,7 +257,7 @@ class XAIImageGenProvider(ImageGenProvider): return error_response( error="xAI returned no image data", error_type="empty_response", - provider="xai", + provider=provider_name, model=model_id, prompt=prompt, aspect_ratio=aspect, diff --git a/plugins/video_gen/xai/__init__.py b/plugins/video_gen/xai/__init__.py index b7421799044..d6fe9d04a7b 100644 --- a/plugins/video_gen/xai/__init__.py +++ b/plugins/video_gen/xai/__init__.py @@ -10,8 +10,12 @@ Originally salvaged from PR #10600 by @Jaaneek; reshaped into the :class:`VideoGenProvider` plugin interface and trimmed to the generate-only surface. -Authentication via ``XAI_API_KEY``. Output is an HTTPS URL from xAI's -CDN; the gateway downloads and delivers it. +Authentication: xAI Grok OAuth tokens (preferred — billed against the +user's SuperGrok subscription) or ``XAI_API_KEY``. Both routes are +resolved through ``tools.xai_http.resolve_xai_http_credentials`` so a +single login covers chat + TTS + image gen + video gen + transcription. +Output is an HTTPS URL from xAI's CDN; the gateway downloads and +delivers it. """ from __future__ import annotations @@ -20,7 +24,7 @@ import asyncio import logging import os import uuid -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple import httpx @@ -66,24 +70,44 @@ _MODELS: Dict[str, Dict[str, Any]] = { # --------------------------------------------------------------------------- -def _xai_base_url() -> str: - return (os.getenv("XAI_BASE_URL") or DEFAULT_XAI_BASE_URL).strip().rstrip("/") +def _resolve_xai_credentials() -> Tuple[str, str]: + """Return ``(api_key, base_url)`` from the shared xAI credential resolver. + + Order: runtime provider (xai-oauth pool entry) → singleton ``auth.json`` + OAuth tokens → ``XAI_API_KEY`` env var. ``api_key`` is empty when no + credential source is available; callers must check before using it. + """ + try: + from tools.xai_http import resolve_xai_http_credentials + + creds = resolve_xai_http_credentials() or {} + except Exception as exc: + logger.debug("xAI credential resolver failed: %s", exc) + creds = {} + + api_key = str(creds.get("api_key") or os.getenv("XAI_API_KEY", "")).strip() + base_url = str( + creds.get("base_url") + or os.getenv("XAI_BASE_URL") + or DEFAULT_XAI_BASE_URL + ).strip().rstrip("/") + return api_key, base_url -def _xai_headers() -> Dict[str, str]: - api_key = os.getenv("XAI_API_KEY", "").strip() - if not api_key: - raise ValueError("XAI_API_KEY not set. Get one at https://console.x.ai/") +def _xai_user_agent() -> str: try: from tools.xai_http import hermes_xai_user_agent - ua = hermes_xai_user_agent() + return hermes_xai_user_agent() except Exception: - ua = "hermes-agent/video_gen" + return "hermes-agent/video_gen" + + +def _xai_headers(api_key: str) -> Dict[str, str]: return { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", - "User-Agent": ua, + "User-Agent": _xai_user_agent(), } @@ -110,12 +134,15 @@ def _clamp_duration(duration: Optional[int], has_reference_images: bool) -> int: async def _submit( client: httpx.AsyncClient, payload: Dict[str, Any], + *, + api_key: str, + base_url: str, ) -> str: """POST to /videos/generations — xAI's only public endpoint for our text-to-video and image-to-video surface.""" response = await client.post( - f"{_xai_base_url()}/videos/generations", - headers={**_xai_headers(), "x-idempotency-key": str(uuid.uuid4())}, + f"{base_url}/videos/generations", + headers={**_xai_headers(api_key), "x-idempotency-key": str(uuid.uuid4())}, json=payload, timeout=60, ) @@ -131,6 +158,8 @@ async def _poll( client: httpx.AsyncClient, request_id: str, *, + api_key: str, + base_url: str, timeout_seconds: int, poll_interval: int, ) -> Dict[str, Any]: @@ -138,8 +167,8 @@ async def _poll( last_status = "queued" while elapsed < timeout_seconds: response = await client.get( - f"{_xai_base_url()}/videos/{request_id}", - headers=_xai_headers(), + f"{base_url}/videos/{request_id}", + headers=_xai_headers(api_key), timeout=30, ) response.raise_for_status() @@ -174,7 +203,8 @@ class XAIVideoGenProvider(VideoGenProvider): return "xAI" def is_available(self) -> bool: - return bool(os.environ.get("XAI_API_KEY", "").strip()) + api_key, _ = _resolve_xai_credentials() + return bool(api_key) def list_models(self) -> List[Dict[str, Any]]: return [{"id": mid, **meta} for mid, meta in _MODELS.items()] @@ -183,17 +213,18 @@ class XAIVideoGenProvider(VideoGenProvider): return DEFAULT_MODEL def get_setup_schema(self) -> Dict[str, Any]: + # Auth resolution lives entirely in the shared ``xai_grok`` post_setup + # hook (``hermes_cli/tools_config.py``) so the picker doesn't blindly + # prompt for an API key when the user is already signed in via xAI + # Grok OAuth (SuperGrok Subscription) — TTS / image gen / video gen + # all share the same credential resolver. The hook offers an + # OAuth-vs-API-key choice when neither is configured. return { - "name": "xAI", + "name": "xAI Grok Imagine", "badge": "paid", - "tag": "grok-imagine-video — text-to-video & image-to-video with reference images", - "env_vars": [ - { - "key": "XAI_API_KEY", - "prompt": "xAI API key", - "url": "https://console.x.ai/", - }, - ], + "tag": "grok-imagine-video — text-to-video & image-to-video; uses xAI Grok OAuth or XAI_API_KEY", + "env_vars": [], + "post_setup": "xai_grok", } def capabilities(self) -> Dict[str, Any]: @@ -259,9 +290,14 @@ class XAIVideoGenProvider(VideoGenProvider): aspect_ratio: str, resolution: str, ) -> Dict[str, Any]: - if not os.environ.get("XAI_API_KEY", "").strip(): + api_key, base_url = _resolve_xai_credentials() + if not api_key: return error_response( - error="XAI_API_KEY not set. Get one at https://console.x.ai/", + error=( + "No xAI credentials found. Sign in via `hermes auth add xai-oauth` " + "(SuperGrok subscription) or set XAI_API_KEY from " + "https://console.x.ai/." + ), error_type="auth_required", provider="xai", prompt=prompt, ) @@ -317,7 +353,9 @@ class XAIVideoGenProvider(VideoGenProvider): async with httpx.AsyncClient() as client: try: - request_id = await _submit(client, payload) + request_id = await _submit( + client, payload, api_key=api_key, base_url=base_url + ) except httpx.HTTPStatusError as exc: detail = "" try: @@ -334,6 +372,7 @@ class XAIVideoGenProvider(VideoGenProvider): poll_result = await _poll( client, request_id, + api_key=api_key, base_url=base_url, timeout_seconds=DEFAULT_TIMEOUT_SECONDS, poll_interval=DEFAULT_POLL_INTERVAL_SECONDS, ) diff --git a/run_agent.py b/run_agent.py index a4df8749777..a82c6417ae1 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1275,7 +1275,7 @@ class AIAgent: self.api_mode = api_mode elif self.provider == "openai-codex": self.api_mode = "codex_responses" - elif self.provider == "xai": + elif self.provider in {"xai", "xai-oauth"}: self.api_mode = "codex_responses" elif (provider_name is None) and ( self._base_url_hostname == "chatgpt.com" @@ -7139,15 +7139,60 @@ class AIAgent: raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.") def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool: - if self.api_mode != "codex_responses" or self.provider != "openai-codex": + if self.api_mode != "codex_responses" or self.provider not in {"openai-codex", "xai-oauth"}: + return False + + # Guard against silent account swap. + # + # When an agent is using a non-singleton credential — e.g. a manual + # pool entry (``hermes auth add xai-oauth``) whose tokens belong to + # a different account than the loopback_pkce singleton, or an agent + # constructed with an explicit ``api_key=`` arg — force-refreshing + # the singleton here and adopting its tokens silently re-routes the + # rest of the conversation onto the singleton's account. The + # credential pool's reactive recovery (``_recover_with_credential_pool``) + # is the right channel for that case; this path is the + # singleton-only fallback used when the pool can't recover, and + # MUST only fire when the agent really is on singleton tokens. + try: + if self.provider == "openai-codex": + from hermes_cli.auth import resolve_codex_runtime_credentials + + singleton_now = resolve_codex_runtime_credentials( + refresh_if_expiring=False, + ) + else: + from hermes_cli.auth import resolve_xai_oauth_runtime_credentials + + singleton_now = resolve_xai_oauth_runtime_credentials( + refresh_if_expiring=False, + ) + except Exception as exc: + logger.debug("%s singleton read failed: %s", self.provider, exc) + return False + + singleton_key = str(singleton_now.get("api_key") or "").strip() + active_key = str(self.api_key or "").strip() + if singleton_key and active_key and singleton_key != active_key: + logger.debug( + "%s singleton tokens differ from the active api_key; " + "skipping singleton force-refresh to avoid silent account swap. " + "Reactive credential rotation should go through the pool.", + self.provider, + ) return False try: - from hermes_cli.auth import resolve_codex_runtime_credentials + if self.provider == "openai-codex": + from hermes_cli.auth import resolve_codex_runtime_credentials - creds = resolve_codex_runtime_credentials(force_refresh=force) + creds = resolve_codex_runtime_credentials(force_refresh=force) + else: + from hermes_cli.auth import resolve_xai_oauth_runtime_credentials + + creds = resolve_xai_oauth_runtime_credentials(force_refresh=force) except Exception as exc: - logger.debug("Codex credential refresh failed: %s", exc) + logger.debug("%s credential refresh failed: %s", self.provider, exc) return False api_key = creds.get("api_key") @@ -7162,7 +7207,7 @@ class AIAgent: self._client_kwargs["api_key"] = self.api_key self._client_kwargs["base_url"] = self.base_url - if not self._replace_primary_openai_client(reason="codex_credential_refresh"): + if not self._replace_primary_openai_client(reason=f"{self.provider}_credential_refresh"): return False return True @@ -9631,7 +9676,7 @@ class AIAgent: and "/backend-api/codex" in self._base_url_lower ) ) - is_xai_responses = self.provider == "xai" or self._base_url_hostname == "api.x.ai" + is_xai_responses = self.provider in {"xai", "xai-oauth"} or self._base_url_hostname == "api.x.ai" _msgs_for_codex = self._prepare_messages_for_non_vision_model(api_messages) return _ct.build_kwargs( model=self.model, @@ -13700,13 +13745,14 @@ class AIAgent: if ( self.api_mode == "codex_responses" - and self.provider == "openai-codex" + and self.provider in {"openai-codex", "xai-oauth"} and status_code == 401 and not codex_auth_retry_attempted ): codex_auth_retry_attempted = True if self._try_refresh_codex_client_credentials(force=True): - self._vprint(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...") + _label = "xAI OAuth" if self.provider == "xai-oauth" else "Codex" + self._vprint(f"{self.log_prefix}🔐 {_label} auth refreshed after 401. Retrying request...") continue if ( self.api_mode == "chat_completions" @@ -14346,11 +14392,15 @@ class AIAgent: self._vprint(f"{self.log_prefix} 🌐 Endpoint: {_base}", force=True) # Actionable guidance for common auth errors if classified.is_auth or classified.reason == FailoverReason.billing: - if _provider == "openai-codex" and status_code == 401: - self._vprint(f"{self.log_prefix} 💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True) - self._vprint(f"{self.log_prefix} refreshed by another client (Codex CLI, VS Code). To fix:", force=True) - self._vprint(f"{self.log_prefix} 1. Run `codex` in your terminal to generate fresh tokens.", force=True) - self._vprint(f"{self.log_prefix} 2. Then run `hermes auth` to re-authenticate.", force=True) + if _provider in {"openai-codex", "xai-oauth"} and status_code == 401: + if _provider == "openai-codex": + self._vprint(f"{self.log_prefix} 💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True) + self._vprint(f"{self.log_prefix} refreshed by another client (Codex CLI, VS Code). To fix:", force=True) + self._vprint(f"{self.log_prefix} 1. Run `codex` in your terminal to generate fresh tokens.", force=True) + self._vprint(f"{self.log_prefix} 2. Then run `hermes auth` to re-authenticate.", force=True) + else: + self._vprint(f"{self.log_prefix} 💡 xAI OAuth token was rejected (HTTP 401). To fix:", force=True) + self._vprint(f"{self.log_prefix} re-authenticate with xAI Grok OAuth (SuperGrok Subscription) from `hermes model`.", force=True) else: self._vprint(f"{self.log_prefix} 💡 Your API key was rejected by the provider. Check:", force=True) self._vprint(f"{self.log_prefix} • Is the key valid? Run: hermes setup", force=True) diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index 6a4cda173ad..7100e8ac17d 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -100,6 +100,49 @@ class TestCodexBuildKwargs: ) assert "prompt_cache_key" not in kw + def test_xai_responses_sends_cache_key_via_extra_body(self, transport): + """xAI's Responses API documents ``prompt_cache_key`` as the + body-level cache-routing key (the ``x-grok-conv-id`` header is + Chat-Completions-only). Passing it via ``extra_body`` is robust + against openai SDK builds whose ``Responses.stream()`` kwarg + signature ever drops the field — the body field still serializes + and reaches xAI either way. The ``x-grok-conv-id`` header is kept + as a belt-and-braces fallback so cache routing survives even + when the body field would be stripped by an intermediate proxy. + Ref: https://docs.x.ai/developers/advanced-api-usage/prompt-caching/maximizing-cache-hits + """ + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-4.3", messages=messages, tools=[], + session_id="conv-xai-1", + is_xai_responses=True, + ) + # Top-level prompt_cache_key must NOT be set for xAI — the SDK + # signature drop is what motivated the extra_body indirection in + # the first place. The cache-routing field must travel in the + # body via extra_body. + assert "prompt_cache_key" not in kw + assert kw.get("extra_body", {}).get("prompt_cache_key") == "conv-xai-1" + # Header kept as belt-and-braces. + assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-xai-1" + + def test_xai_responses_extra_body_preserves_caller_fields(self, transport): + """When the caller already supplies ``extra_body`` (e.g. via + request_overrides), the xAI cache-key injection must merge into + the existing dict instead of overwriting it. Caller-supplied + ``prompt_cache_key`` wins (setdefault semantics) so user overrides + aren't silently clobbered by the transport.""" + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-4.3", messages=messages, tools=[], + session_id="conv-xai-1", + is_xai_responses=True, + request_overrides={"extra_body": {"prompt_cache_key": "caller-override", "other_field": 42}}, + ) + eb = kw.get("extra_body", {}) + assert eb.get("prompt_cache_key") == "caller-override" + assert eb.get("other_field") == 42 + def test_max_tokens(self, transport): messages = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs( diff --git a/tests/hermes_cli/test_auth_xai_oauth_provider.py b/tests/hermes_cli/test_auth_xai_oauth_provider.py new file mode 100644 index 00000000000..9f1cc55f57e --- /dev/null +++ b/tests/hermes_cli/test_auth_xai_oauth_provider.py @@ -0,0 +1,1605 @@ +"""Tests for xAI Grok OAuth — tokens stored in Hermes auth store (~/.hermes/auth.json).""" + +import base64 +import json +import time +from pathlib import Path + +import pytest + +from hermes_cli.auth import ( + AuthError, + DEFAULT_XAI_OAUTH_BASE_URL, + PROVIDER_REGISTRY, + XAI_OAUTH_CLIENT_ID, + XAI_OAUTH_REDIRECT_HOST, + XAI_OAUTH_REDIRECT_PATH, + XAI_OAUTH_SCOPE, + _read_xai_oauth_tokens, + _save_xai_oauth_tokens, + _xai_access_token_is_expiring, + _xai_callback_cors_origin, + _xai_oauth_build_authorize_url, + _xai_validate_loopback_redirect_uri, + get_xai_oauth_auth_status, + refresh_xai_oauth_pure, + resolve_provider, + resolve_xai_oauth_runtime_credentials, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _setup_hermes_auth( + hermes_home: Path, + *, + access_token: str = "access", + refresh_token: str = "refresh", + discovery: dict | None = None, +): + """Write xAI OAuth tokens into the Hermes auth store at the given root.""" + hermes_home.mkdir(parents=True, exist_ok=True) + state = { + "tokens": { + "access_token": access_token, + "refresh_token": refresh_token, + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + }, + "last_refresh": "2026-05-14T00:00:00Z", + "auth_mode": "oauth_pkce", + } + if discovery is not None: + state["discovery"] = discovery + auth_store = { + "version": 1, + "active_provider": "xai-oauth", + "providers": {"xai-oauth": state}, + } + auth_file = hermes_home / "auth.json" + auth_file.write_text(json.dumps(auth_store, indent=2)) + return auth_file + + +def _jwt_with_exp(exp_epoch: int) -> str: + """Build a minimal JWT-shaped string with the given exp claim.""" + payload = {"exp": exp_epoch} + encoded = ( + base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")) + .rstrip(b"=") + .decode("utf-8") + ) + return f"h.{encoded}.s" + + +class _StubHTTPResponse: + def __init__(self, status_code: int, payload): + self.status_code = status_code + self._payload = payload + self.text = json.dumps(payload) if isinstance(payload, (dict, list)) else str(payload) + + def json(self): + if isinstance(self._payload, Exception): + raise self._payload + return self._payload + + +class _StubHTTPClient: + def __init__(self, response): + self._response = response + self.last_call = None + + def __enter__(self): + return self + + def __exit__(self, *args): + return False + + def post(self, *args, **kwargs): + self.last_call = ("post", args, kwargs) + return self._response + + +def _patch_httpx_client(monkeypatch, response): + holder = {"client": None} + + def _factory(*args, **kwargs): + client = _StubHTTPClient(response) + holder["client"] = client + return client + + monkeypatch.setattr("hermes_cli.auth.httpx.Client", _factory) + return holder + + +# --------------------------------------------------------------------------- +# Constants and registry +# --------------------------------------------------------------------------- + + +def test_xai_oauth_provider_registered(): + assert "xai-oauth" in PROVIDER_REGISTRY + pconfig = PROVIDER_REGISTRY["xai-oauth"] + assert pconfig.id == "xai-oauth" + assert pconfig.auth_type == "oauth_external" + assert pconfig.inference_base_url == DEFAULT_XAI_OAUTH_BASE_URL + + +def test_resolve_provider_normalizes_xai_oauth_aliases(): + assert resolve_provider("xai-oauth") == "xai-oauth" + assert resolve_provider("grok-oauth") == "xai-oauth" + assert resolve_provider("x-ai-oauth") == "xai-oauth" + assert resolve_provider("xai-grok-oauth") == "xai-oauth" + + +# --------------------------------------------------------------------------- +# JWT expiry detection +# --------------------------------------------------------------------------- + + +def test_xai_access_token_is_expiring_returns_true_for_expired_jwt(): + expired = _jwt_with_exp(int(time.time()) - 60) + assert _xai_access_token_is_expiring(expired, 0) is True + + +def test_xai_access_token_is_expiring_returns_false_for_fresh_jwt(): + fresh = _jwt_with_exp(int(time.time()) + 3600) + assert _xai_access_token_is_expiring(fresh, 0) is False + + +def test_xai_access_token_is_expiring_honors_skew_window(): + near = _jwt_with_exp(int(time.time()) + 30) + assert _xai_access_token_is_expiring(near, 60) is True + assert _xai_access_token_is_expiring(near, 0) is False + + +def test_xai_access_token_is_expiring_returns_false_for_non_jwt(): + assert _xai_access_token_is_expiring("not.a.jwt.but.has.dots", 0) is False + assert _xai_access_token_is_expiring("opaque-token-no-dots", 0) is False + assert _xai_access_token_is_expiring("", 0) is False + assert _xai_access_token_is_expiring(None, 0) is False # type: ignore[arg-type] + + +def test_xai_access_token_is_expiring_returns_false_for_jwt_without_exp(): + payload = {"sub": "user"} + encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode() + token = f"h.{encoded}.s" + assert _xai_access_token_is_expiring(token, 0) is False + + +# --------------------------------------------------------------------------- +# Loopback redirect URI validation +# --------------------------------------------------------------------------- + + +def test_xai_validate_loopback_redirect_uri_accepts_localhost_with_port(): + host, port, path = _xai_validate_loopback_redirect_uri( + "http://127.0.0.1:56121/callback" + ) + assert host == XAI_OAUTH_REDIRECT_HOST + assert port == 56121 + assert path == XAI_OAUTH_REDIRECT_PATH + + +def test_xai_validate_loopback_redirect_uri_rejects_https(): + with pytest.raises(AuthError) as exc: + _xai_validate_loopback_redirect_uri("https://127.0.0.1:56121/callback") + assert exc.value.code == "xai_redirect_invalid" + + +def test_xai_validate_loopback_redirect_uri_rejects_non_loopback(): + with pytest.raises(AuthError) as exc: + _xai_validate_loopback_redirect_uri("http://example.com:56121/callback") + assert exc.value.code == "xai_redirect_invalid" + + +def test_xai_validate_loopback_redirect_uri_rejects_missing_port(): + with pytest.raises(AuthError) as exc: + _xai_validate_loopback_redirect_uri("http://127.0.0.1/callback") + assert exc.value.code == "xai_redirect_invalid" + + +# --------------------------------------------------------------------------- +# Authorize URL construction +# --------------------------------------------------------------------------- + + +def _parse_authorize_url(url: str) -> dict: + from urllib.parse import urlparse, parse_qs + + parsed = urlparse(url) + return {k: v[0] for k, v in parse_qs(parsed.query).items()} + + +def test_xai_oauth_authorize_url_includes_plan_generic(): + """Regression: accounts.x.ai requires `plan=generic` for loopback OAuth on + non-allowlisted clients. Must always be present on the authorize URL.""" + url = _xai_oauth_build_authorize_url( + authorization_endpoint="https://auth.x.ai/oauth2/authorize", + redirect_uri="http://127.0.0.1:56121/callback", + code_challenge="challenge-xyz", + state="state-abc", + nonce="nonce-def", + ) + params = _parse_authorize_url(url) + assert params["plan"] == "generic" + + +def test_xai_oauth_authorize_url_includes_referrer_hermes_agent(): + """Attribution: xAI's OAuth server can identify Hermes-originated logins + via the referrer query param. Must always be present on the authorize URL.""" + url = _xai_oauth_build_authorize_url( + authorization_endpoint="https://auth.x.ai/oauth2/authorize", + redirect_uri="http://127.0.0.1:56121/callback", + code_challenge="challenge-xyz", + state="state-abc", + nonce="nonce-def", + ) + params = _parse_authorize_url(url) + assert params["referrer"] == "hermes-agent" + + +def test_xai_oauth_authorize_url_includes_pkce_and_oidc_params(): + url = _xai_oauth_build_authorize_url( + authorization_endpoint="https://auth.x.ai/oauth2/authorize", + redirect_uri="http://127.0.0.1:56121/callback", + code_challenge="challenge-xyz", + state="state-abc", + nonce="nonce-def", + ) + params = _parse_authorize_url(url) + assert params["response_type"] == "code" + assert params["client_id"] == XAI_OAUTH_CLIENT_ID + assert params["redirect_uri"] == "http://127.0.0.1:56121/callback" + assert params["scope"] == XAI_OAUTH_SCOPE + assert params["code_challenge"] == "challenge-xyz" + assert params["code_challenge_method"] == "S256" + assert params["state"] == "state-abc" + assert params["nonce"] == "nonce-def" + + +# --------------------------------------------------------------------------- +# CORS allowlist +# --------------------------------------------------------------------------- + + +def test_xai_callback_cors_origin_allowlist(): + assert _xai_callback_cors_origin("https://accounts.x.ai") == "https://accounts.x.ai" + assert _xai_callback_cors_origin("https://auth.x.ai") == "https://auth.x.ai" + + +def test_xai_callback_cors_origin_rejects_unknown_origin(): + assert _xai_callback_cors_origin("https://attacker.example.com") == "" + assert _xai_callback_cors_origin(None) == "" + assert _xai_callback_cors_origin("") == "" + + +# --------------------------------------------------------------------------- +# Token roundtrip + reads +# --------------------------------------------------------------------------- + + +def test_save_and_read_xai_oauth_tokens_roundtrip(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + _save_xai_oauth_tokens( + { + "access_token": "at-1", + "refresh_token": "rt-1", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + }, + discovery={"token_endpoint": "https://auth.x.ai/oauth2/token"}, + redirect_uri="http://127.0.0.1:56121/callback", + ) + data = _read_xai_oauth_tokens() + assert data["tokens"]["access_token"] == "at-1" + assert data["tokens"]["refresh_token"] == "rt-1" + assert data["redirect_uri"] == "http://127.0.0.1:56121/callback" + assert data["discovery"]["token_endpoint"] == "https://auth.x.ai/oauth2/token" + + +def test_read_xai_oauth_tokens_missing(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + with pytest.raises(AuthError) as exc: + _read_xai_oauth_tokens() + assert exc.value.code == "xai_auth_missing" + assert exc.value.relogin_required is True + + +def test_read_xai_oauth_tokens_missing_access_token(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + _setup_hermes_auth(hermes_home, access_token="") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + with pytest.raises(AuthError) as exc: + _read_xai_oauth_tokens() + assert exc.value.code == "xai_auth_missing_access_token" + assert exc.value.relogin_required is True + + +def test_read_xai_oauth_tokens_missing_refresh_token(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + _setup_hermes_auth(hermes_home, refresh_token="") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + with pytest.raises(AuthError) as exc: + _read_xai_oauth_tokens() + assert exc.value.code == "xai_auth_missing_refresh_token" + assert exc.value.relogin_required is True + + +# --------------------------------------------------------------------------- +# Runtime credential resolution +# --------------------------------------------------------------------------- + + +def test_resolve_xai_runtime_credentials_returns_singleton_state(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=fresh) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False) + monkeypatch.delenv("XAI_BASE_URL", raising=False) + + creds = resolve_xai_oauth_runtime_credentials() + assert creds["provider"] == "xai-oauth" + assert creds["api_key"] == fresh + assert creds["base_url"] == DEFAULT_XAI_OAUTH_BASE_URL + assert creds["source"] == "hermes-auth-store" + assert creds["auth_mode"] == "oauth_pkce" + + +def test_resolve_xai_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + expiring = _jwt_with_exp(int(time.time()) - 10) + _setup_hermes_auth( + hermes_home, + access_token=expiring, + refresh_token="rt-old", + discovery={"token_endpoint": "https://auth.x.ai/oauth2/token"}, + ) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + new_access = _jwt_with_exp(int(time.time()) + 3600) + called = {"count": 0} + + def _fake_refresh(tokens, **kwargs): + called["count"] += 1 + updated = dict(tokens) + updated["access_token"] = new_access + updated["refresh_token"] = "rt-new" + return updated + + monkeypatch.setattr("hermes_cli.auth._refresh_xai_oauth_tokens", _fake_refresh) + + creds = resolve_xai_oauth_runtime_credentials() + assert called["count"] == 1 + assert creds["api_key"] == new_access + + +def test_resolve_xai_runtime_credentials_force_refresh(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth( + hermes_home, + access_token=fresh, + discovery={"token_endpoint": "https://auth.x.ai/oauth2/token"}, + ) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + forced = _jwt_with_exp(int(time.time()) + 7200) + called = {"count": 0} + + def _fake_refresh(tokens, **kwargs): + called["count"] += 1 + updated = dict(tokens) + updated["access_token"] = forced + return updated + + monkeypatch.setattr("hermes_cli.auth._refresh_xai_oauth_tokens", _fake_refresh) + + creds = resolve_xai_oauth_runtime_credentials(force_refresh=True, refresh_if_expiring=False) + assert called["count"] == 1 + assert creds["api_key"] == forced + + +def test_resolve_xai_runtime_credentials_honours_env_base_url(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=fresh) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setenv("HERMES_XAI_BASE_URL", "https://custom.x.ai/v1/") + + creds = resolve_xai_oauth_runtime_credentials() + assert creds["base_url"] == "https://custom.x.ai/v1" + + +# --------------------------------------------------------------------------- +# Auth status surface +# --------------------------------------------------------------------------- + + +def test_get_xai_oauth_auth_status_logged_in_via_singleton(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=fresh) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + status = get_xai_oauth_auth_status() + assert status["logged_in"] is True + assert status["api_key"] == fresh + assert status["auth_mode"] == "oauth_pkce" + + +def test_get_xai_oauth_auth_status_logged_out(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + status = get_xai_oauth_auth_status() + assert status["logged_in"] is False + assert "error" in status + + +# --------------------------------------------------------------------------- +# refresh_xai_oauth_pure error handling +# --------------------------------------------------------------------------- + + +def test_refresh_xai_oauth_pure_requires_refresh_token(): + with pytest.raises(AuthError) as exc: + refresh_xai_oauth_pure("at", "") + assert exc.value.code == "xai_auth_missing_refresh_token" + assert exc.value.relogin_required is True + + +def test_refresh_xai_oauth_pure_relogin_on_400(monkeypatch): + response = _StubHTTPResponse(400, {"error": "invalid_grant"}) + _patch_httpx_client(monkeypatch, response) + with pytest.raises(AuthError) as exc: + refresh_xai_oauth_pure( + "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token" + ) + assert exc.value.code == "xai_refresh_failed" + assert exc.value.relogin_required is True + + +def test_refresh_xai_oauth_pure_no_relogin_on_500(monkeypatch): + response = _StubHTTPResponse(503, "service unavailable") + _patch_httpx_client(monkeypatch, response) + with pytest.raises(AuthError) as exc: + refresh_xai_oauth_pure( + "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token" + ) + assert exc.value.code == "xai_refresh_failed" + assert exc.value.relogin_required is False + + +def test_refresh_xai_oauth_pure_returns_updated_tokens(monkeypatch): + new_access = _jwt_with_exp(int(time.time()) + 3600) + response = _StubHTTPResponse( + 200, + { + "access_token": new_access, + "refresh_token": "rt-rotated", + "id_token": "id-1", + "expires_in": 3600, + "token_type": "Bearer", + }, + ) + holder = _patch_httpx_client(monkeypatch, response) + + updated = refresh_xai_oauth_pure( + "at", "rt-old", token_endpoint="https://auth.x.ai/oauth2/token" + ) + assert updated["access_token"] == new_access + assert updated["refresh_token"] == "rt-rotated" + assert updated["id_token"] == "id-1" + assert updated["token_type"] == "Bearer" + assert updated["last_refresh"].endswith("Z") + client = holder["client"] + assert client is not None + _method, _args, kwargs = client.last_call + assert kwargs["data"]["grant_type"] == "refresh_token" + assert kwargs["data"]["refresh_token"] == "rt-old" + assert kwargs["data"]["client_id"] == XAI_OAUTH_CLIENT_ID + + +def test_refresh_xai_oauth_pure_keeps_refresh_token_when_response_omits_it(monkeypatch): + """Some OAuth providers don't rotate refresh tokens — preserve the old one.""" + new_access = _jwt_with_exp(int(time.time()) + 3600) + response = _StubHTTPResponse( + 200, + { + "access_token": new_access, + "expires_in": 3600, + "token_type": "Bearer", + }, + ) + _patch_httpx_client(monkeypatch, response) + + updated = refresh_xai_oauth_pure( + "at", "rt-stable", token_endpoint="https://auth.x.ai/oauth2/token" + ) + assert updated["access_token"] == new_access + assert updated["refresh_token"] == "rt-stable" + + +def test_refresh_xai_oauth_pure_rejects_response_without_access_token(monkeypatch): + response = _StubHTTPResponse( + 200, + {"refresh_token": "rt-new", "expires_in": 3600}, + ) + _patch_httpx_client(monkeypatch, response) + with pytest.raises(AuthError) as exc: + refresh_xai_oauth_pure( + "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token" + ) + assert exc.value.code == "xai_refresh_missing_access_token" + assert exc.value.relogin_required is True + + +def test_refresh_xai_oauth_pure_raises_typed_error_on_malformed_json(monkeypatch): + """xAI returning HTTP 200 with a non-JSON body (captive portal, proxy + error page, etc.) must surface a typed AuthError, not a raw + ``json.JSONDecodeError`` traceback. Matches the qwen-oauth precedent + so the upstream UX layer (``format_auth_error``) can map the failure.""" + response = _StubHTTPResponse(200, ValueError("not json")) + response.text = "captive portal" + _patch_httpx_client(monkeypatch, response) + with pytest.raises(AuthError) as exc: + refresh_xai_oauth_pure( + "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token" + ) + assert exc.value.code == "xai_refresh_invalid_json" + + +def test_xai_oauth_discovery_raises_typed_error_on_malformed_json(monkeypatch): + """Discovery is a cold-start, one-time fetch. If the response is HTTP + 200 with a non-JSON body (corporate proxy / captive portal returning + HTML), surface a typed AuthError rather than letting the + ``json.JSONDecodeError`` escape — so the message reads as an auth + problem instead of an internal parsing crash.""" + from hermes_cli.auth import _xai_oauth_discovery + + class _BadJSON: + status_code = 200 + + def json(self): + raise ValueError("Expecting value: line 1 column 1 (char 0)") + + monkeypatch.setattr( + "hermes_cli.auth.httpx.get", + lambda *a, **kw: _BadJSON(), + ) + with pytest.raises(AuthError) as exc: + _xai_oauth_discovery() + assert exc.value.code == "xai_discovery_invalid_json" + + +def test_xai_oauth_discovery_raises_typed_error_on_non_object_payload(monkeypatch): + """A discovery body that decodes as JSON but isn't an object (e.g. a + bare string or array) must not slip through and trigger an + ``AttributeError`` on ``payload.get(...)`` later. Reject loudly + with the same incomplete-response code the missing-endpoint path uses.""" + from hermes_cli.auth import _xai_oauth_discovery + + class _StubResponse: + status_code = 200 + + def json(self): + return ["not", "an", "object"] + + monkeypatch.setattr( + "hermes_cli.auth.httpx.get", + lambda *a, **kw: _StubResponse(), + ) + with pytest.raises(AuthError) as exc: + _xai_oauth_discovery() + assert exc.value.code == "xai_discovery_incomplete" + + +# --------------------------------------------------------------------------- +# OIDC discovery endpoint origin/scheme validation (MITM hardening) +# --------------------------------------------------------------------------- + + +def test_refresh_xai_oauth_pure_rejects_non_https_token_endpoint(monkeypatch): + """A poisoned auth.json (from MITM during initial discovery, or an older + Hermes that didn't validate) must not be silently honored on the refresh + hot path. A non-HTTPS ``token_endpoint`` would leak the refresh_token in + cleartext on every refresh; refuse before the POST.""" + # No HTTP stub installed — refresh must fail at validation, not at POST. + with pytest.raises(AuthError) as exc: + refresh_xai_oauth_pure( + "at", "rt", token_endpoint="http://auth.x.ai/oauth2/token" + ) + assert exc.value.code == "xai_discovery_invalid" + + +def test_refresh_xai_oauth_pure_rejects_off_origin_token_endpoint(monkeypatch): + """Pin the cached token_endpoint host to the xAI origin. A one-time MITM + during discovery could persist a token_endpoint on attacker-controlled + infrastructure — every subsequent refresh would silently leak the + refresh_token to that attacker. Refuse off-origin endpoints loudly so + the user can re-run discovery.""" + with pytest.raises(AuthError) as exc: + refresh_xai_oauth_pure( + "at", "rt", token_endpoint="https://evil.example.com/token" + ) + assert exc.value.code == "xai_discovery_invalid" + + +def test_refresh_xai_oauth_pure_rejects_lookalike_suffix(monkeypatch): + """Substring confusion: ``evil-x.ai`` ends in ``x.ai`` but is NOT a + ``.x.ai`` subdomain. The validator must enforce the leading-dot suffix + so attacker-registered apex lookalikes can't slip through.""" + with pytest.raises(AuthError) as exc: + refresh_xai_oauth_pure( + "at", "rt", token_endpoint="https://evilx.ai/token" + ) + assert exc.value.code == "xai_discovery_invalid" + + +def test_refresh_xai_oauth_pure_accepts_apex_and_subdomain_endpoints(monkeypatch): + """The validator must accept BOTH the bare xAI apex (``x.ai``) and any + ``*.x.ai`` subdomain (e.g. ``auth.x.ai`` today, future migrations to + ``accounts.x.ai`` etc.). Without subdomain support we'd lock the + integration to whatever xAI happens to use today.""" + new_access = _jwt_with_exp(int(time.time()) + 3600) + response = _StubHTTPResponse( + 200, + {"access_token": new_access, "expires_in": 3600, "token_type": "Bearer"}, + ) + _patch_httpx_client(monkeypatch, response) + # auth.x.ai (current production) + updated = refresh_xai_oauth_pure( + "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token" + ) + assert updated["access_token"] == new_access + # hypothetical migration to accounts.x.ai + _patch_httpx_client(monkeypatch, response) + updated2 = refresh_xai_oauth_pure( + "at", "rt", token_endpoint="https://accounts.x.ai/token" + ) + assert updated2["access_token"] == new_access + + +def test_xai_oauth_discovery_validates_endpoints(monkeypatch): + """The discovery response itself goes through endpoint validation, so a + one-time MITM during initial login cannot poison ``auth.json`` with an + attacker-controlled ``token_endpoint``. (The persistence is what makes + this attack worth defending against — one MITM = forever credential + leak.)""" + from hermes_cli.auth import _xai_oauth_discovery + + class _StubGetResponse: + status_code = 200 + + def __init__(self, payload): + self._payload = payload + + def json(self): + return self._payload + + def _fake_get(url, headers=None, timeout=None): + return _StubGetResponse({ + "authorization_endpoint": "https://auth.x.ai/oauth2/authorize", + "token_endpoint": "https://evil.example.com/token", # poisoned + }) + + monkeypatch.setattr("hermes_cli.auth.httpx.get", _fake_get) + with pytest.raises(AuthError) as exc: + _xai_oauth_discovery() + assert exc.value.code == "xai_discovery_invalid" + + +def test_xai_oauth_discovery_validates_authorization_endpoint(monkeypatch): + """A poisoned ``authorization_endpoint`` is just as dangerous as a + poisoned ``token_endpoint``: it sends the user's browser (with their + logged-in xAI session cookies) to attacker infrastructure that can + phish the consent screen and exchange a stolen authorization code. + + Both endpoints must be validated independently. This test pins the + parity so nobody can later "optimise" by validating only the token + endpoint and silently lose authorization-endpoint defense.""" + from hermes_cli.auth import _xai_oauth_discovery + + class _StubGetResponse: + status_code = 200 + + def __init__(self, payload): + self._payload = payload + + def json(self): + return self._payload + + def _fake_get(url, headers=None, timeout=None): + return _StubGetResponse({ + "authorization_endpoint": "https://evil.example.com/authorize", # poisoned + "token_endpoint": "https://auth.x.ai/oauth2/token", + }) + + monkeypatch.setattr("hermes_cli.auth.httpx.get", _fake_get) + with pytest.raises(AuthError) as exc: + _xai_oauth_discovery() + assert exc.value.code == "xai_discovery_invalid" + + +# --------------------------------------------------------------------------- +# Pool seeding from singleton +# --------------------------------------------------------------------------- + + +def test_credential_pool_seeds_xai_oauth_from_singleton(tmp_path, monkeypatch): + """After `hermes model` -> xai-oauth, the singleton holds tokens. load_pool + must surface that as a pool entry so `hermes auth list` reflects truth and + refreshes route through the pool consistently with codex.""" + from agent.credential_pool import load_pool + + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=fresh, refresh_token="rt-1") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + pool = load_pool("xai-oauth") + assert pool.has_credentials() + entries = pool.entries() + assert len(entries) == 1 + entry = entries[0] + assert entry.access_token == fresh + assert entry.refresh_token == "rt-1" + assert entry.source == "loopback_pkce" + assert entry.base_url == DEFAULT_XAI_OAUTH_BASE_URL + + +def test_credential_pool_does_not_seed_when_singleton_missing_access_token(tmp_path, monkeypatch): + from agent.credential_pool import load_pool + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + auth_store = { + "version": 1, + "providers": { + "xai-oauth": { + "tokens": {"access_token": "", "refresh_token": "rt"}, + "auth_mode": "oauth_pkce", + } + }, + } + (hermes_home / "auth.json").write_text(json.dumps(auth_store)) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + pool = load_pool("xai-oauth") + assert not pool.has_credentials() + + +def test_credential_pool_seed_respects_suppression(tmp_path, monkeypatch): + """`hermes auth remove xai-oauth ` for the seeded entry suppresses + further re-seeding so the removal is stable across load_pool calls.""" + from agent.credential_pool import load_pool + + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=fresh) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Suppress the source — mimic `hermes auth remove`. + from hermes_cli.auth import suppress_credential_source + + suppress_credential_source("xai-oauth", "loopback_pkce") + + pool = load_pool("xai-oauth") + assert not pool.has_credentials() + + +def test_auth_remove_xai_oauth_clears_singleton_and_sticks(tmp_path, monkeypatch): + """End-to-end regression: ``hermes auth remove xai-oauth 1`` for a + singleton-seeded entry must clear auth.json providers.xai-oauth AND + suppress further re-seeding — otherwise the next ``load_pool`` call + silently resurrects the entry from the still-present singleton, making + the user-facing removal a no-op (the entry reappears on the next + invocation with no warning). + + The bug pre-fix: there was no RemovalStep registered for + (xai-oauth, loopback_pkce), so ``find_removal_step`` returned None + and ``auth_remove_command`` fell through to the "unregistered source — + nothing to clean up" branch. That branch is correct for ``manual`` + entries (pool-only) but wrong for singleton-seeded loopback_pkce + entries (auth.json singleton survives the in-memory removal).""" + from agent.credential_pool import load_pool + from hermes_cli.auth_commands import auth_remove_command + from types import SimpleNamespace + + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=fresh, refresh_token="rt-1") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Confirm pre-state: pool sees the seeded entry, auth.json has the singleton. + pool = load_pool("xai-oauth") + assert pool.has_credentials() + raw = json.loads((hermes_home / "auth.json").read_text()) + assert "xai-oauth" in raw.get("providers", {}) + + # Act: the user runs `hermes auth remove xai-oauth 1`. + auth_remove_command(SimpleNamespace(provider="xai-oauth", target="1")) + + # Post-state: auth.json singleton must be cleared so a re-seed has + # nothing to import. + raw_after = json.loads((hermes_home / "auth.json").read_text()) + assert "xai-oauth" not in raw_after.get("providers", {}), ( + "auth.json providers.xai-oauth must be cleared — otherwise the " + "next load_pool() reseeds the removed entry from the surviving " + "singleton, silently undoing the user's removal." + ) + + # And the next load must not reseed the entry from anywhere. + pool_after = load_pool("xai-oauth") + assert not pool_after.has_credentials(), ( + "Removal must stick across load_pool() calls — without the " + "loopback_pkce RemovalStep, the seed function reads the singleton " + "and rebuilds the entry on every Hermes invocation." + ) + + +# --------------------------------------------------------------------------- +# Pool sync-back to singleton after refresh +# --------------------------------------------------------------------------- + + +def test_pool_sync_back_writes_to_singleton(tmp_path, monkeypatch): + """When the pool refreshes a singleton-seeded xAI entry, the new tokens + must be written back to providers["xai-oauth"] so that + resolve_xai_oauth_runtime_credentials() (which reads the singleton) + doesn't keep using the consumed refresh token.""" + from agent.credential_pool import load_pool + + hermes_home = tmp_path / "hermes" + expired = _jwt_with_exp(int(time.time()) - 10) + _setup_hermes_auth(hermes_home, access_token=expired, refresh_token="rt-old") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + new_access = _jwt_with_exp(int(time.time()) + 3600) + + def _fake_refresh(access_token, refresh_token, **kwargs): + assert refresh_token == "rt-old" + return { + "access_token": new_access, + "refresh_token": "rt-new", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + "last_refresh": "2026-05-15T01:00:00Z", + } + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh) + + pool = load_pool("xai-oauth") + selected = pool.select() + assert selected is not None + assert selected.access_token == new_access + assert selected.refresh_token == "rt-new" + + # Singleton must reflect refreshed tokens — otherwise the next process + # to load credentials would re-seed the consumed refresh token. + auth_path = hermes_home / "auth.json" + raw = json.loads(auth_path.read_text()) + state = raw["providers"]["xai-oauth"] + assert state["tokens"]["access_token"] == new_access + assert state["tokens"]["refresh_token"] == "rt-new" + assert state["last_refresh"] == "2026-05-15T01:00:00Z" + + +# --------------------------------------------------------------------------- +# Runtime provider routing +# --------------------------------------------------------------------------- + + +def test_runtime_provider_uses_pool_entry_for_xai_oauth(tmp_path, monkeypatch): + from hermes_cli.runtime_provider import resolve_runtime_provider + + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=fresh) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False) + monkeypatch.delenv("XAI_BASE_URL", raising=False) + + runtime = resolve_runtime_provider(requested="xai-oauth") + assert runtime["provider"] == "xai-oauth" + assert runtime["api_mode"] == "codex_responses" + assert runtime["api_key"] == fresh + assert runtime["base_url"] == DEFAULT_XAI_OAUTH_BASE_URL + + +def test_runtime_provider_default_base_url_when_pool_entry_missing_url(tmp_path, monkeypatch): + """Edge case: a pool entry that somehow has an empty base_url should still + surface the default xAI inference base URL instead of an empty string.""" + from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential + import uuid + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False) + monkeypatch.delenv("XAI_BASE_URL", raising=False) + + fresh = _jwt_with_exp(int(time.time()) + 3600) + pool = load_pool("xai-oauth") + pool.add_entry( + PooledCredential( + provider="xai-oauth", + id=uuid.uuid4().hex[:6], + label="test", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="manual:xai_pkce", + access_token=fresh, + refresh_token="rt", + base_url="", + ) + ) + + from hermes_cli.runtime_provider import resolve_runtime_provider + + runtime = resolve_runtime_provider(requested="xai-oauth") + assert runtime["provider"] == "xai-oauth" + assert runtime["api_mode"] == "codex_responses" + assert runtime["api_key"] == fresh + assert runtime["base_url"] == DEFAULT_XAI_OAUTH_BASE_URL + + +# --------------------------------------------------------------------------- +# Token-expiry behavior on the pool path +# --------------------------------------------------------------------------- + + +def test_pool_entry_needs_refresh_when_jwt_within_skew(tmp_path, monkeypatch): + """The pool's proactive-refresh gate must trigger when the JWT exp claim + is within the XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS window — otherwise a + near-expired token will hit the API and 401 unnecessarily. Mirrors the + Codex skew-window behavior.""" + from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential + from hermes_cli.auth import XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS + import uuid + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Token expires in 30s — well inside the 120s skew window. + near_expiry = _jwt_with_exp(int(time.time()) + 30) + pool = load_pool("xai-oauth") + entry = PooledCredential( + provider="xai-oauth", + id=uuid.uuid4().hex[:6], + label="test", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="manual:xai_pkce", + access_token=near_expiry, + refresh_token="rt", + base_url=DEFAULT_XAI_OAUTH_BASE_URL, + ) + pool.add_entry(entry) + assert XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS > 30 + assert pool._entry_needs_refresh(entry) is True + + +def test_pool_entry_no_refresh_for_fresh_jwt(tmp_path, monkeypatch): + """A fresh JWT beyond the skew window must NOT trigger proactive refresh.""" + from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential + import uuid + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + fresh = _jwt_with_exp(int(time.time()) + 3600) + pool = load_pool("xai-oauth") + entry = PooledCredential( + provider="xai-oauth", + id=uuid.uuid4().hex[:6], + label="test", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="manual:xai_pkce", + access_token=fresh, + refresh_token="rt", + base_url=DEFAULT_XAI_OAUTH_BASE_URL, + ) + pool.add_entry(entry) + assert pool._entry_needs_refresh(entry) is False + + +def test_pool_select_proactively_refreshes_expiring_token(tmp_path, monkeypatch): + """End-to-end: pool.select() with refresh=True on an expiring entry must + return the refreshed token. This is the proactive path that runs BEFORE + the API call — separate from the 401-reactive path.""" + from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential + import uuid + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + near_expiry = _jwt_with_exp(int(time.time()) + 30) + new_access = _jwt_with_exp(int(time.time()) + 3600) + + refresh_calls = {"count": 0} + + def _fake_refresh(access_token, refresh_token, **kwargs): + refresh_calls["count"] += 1 + assert refresh_token == "rt-old" + return { + "access_token": new_access, + "refresh_token": "rt-new", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + "last_refresh": "2026-05-15T01:00:00Z", + } + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh) + + pool = load_pool("xai-oauth") + pool.add_entry( + PooledCredential( + provider="xai-oauth", + id=uuid.uuid4().hex[:6], + label="test", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="manual:xai_pkce", + access_token=near_expiry, + refresh_token="rt-old", + base_url=DEFAULT_XAI_OAUTH_BASE_URL, + ) + ) + + selected = pool.select() + assert refresh_calls["count"] == 1 + assert selected is not None + assert selected.access_token == new_access + assert selected.refresh_token == "rt-new" + + +def test_pool_try_refresh_current_handles_xai_oauth(tmp_path, monkeypatch): + """The reactive 401-recovery path uses pool.try_refresh_current(). This + must work for xai-oauth alongside openai-codex — otherwise mid-call + expirations get propagated as hard failures instead of being retried with + fresh tokens.""" + from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential + import uuid + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Even a "fresh-looking" token gets force-refreshed via try_refresh_current. + # We simulate the scenario where the server rejected the token (401) + # despite client-side expiry math saying it's still valid (e.g. clock + # skew, server-side revocation, token bound to a session that expired). + seemingly_fresh = _jwt_with_exp(int(time.time()) + 3600) + new_access = _jwt_with_exp(int(time.time()) + 7200) + + def _fake_refresh(access_token, refresh_token, **kwargs): + return { + "access_token": new_access, + "refresh_token": "rt-rotated", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + "last_refresh": "2026-05-15T02:00:00Z", + } + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh) + + pool = load_pool("xai-oauth") + pool.add_entry( + PooledCredential( + provider="xai-oauth", + id=uuid.uuid4().hex[:6], + label="test", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="manual:xai_pkce", + access_token=seemingly_fresh, + refresh_token="rt-old", + base_url=DEFAULT_XAI_OAUTH_BASE_URL, + ) + ) + pool.select() + refreshed = pool.try_refresh_current() + assert refreshed is not None + assert refreshed.access_token == new_access + assert refreshed.refresh_token == "rt-rotated" + + +def test_pool_refresh_marks_entry_exhausted_on_failure(tmp_path, monkeypatch): + """When the xAI refresh endpoint rejects the refresh_token (e.g. consumed + by another process, revoked), the pool must surface the failure cleanly + rather than silently retaining stale tokens. This is critical for the + failover path — _recover_with_credential_pool rotates to the next entry + only if try_refresh_current returns None.""" + from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential + from hermes_cli.auth import AuthError + import uuid + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + def _fake_refresh_fail(*args, **kwargs): + raise AuthError("refresh_token_reused", code="xai_refresh_failed", relogin_required=True) + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh_fail) + + pool = load_pool("xai-oauth") + seemingly_fresh = _jwt_with_exp(int(time.time()) + 3600) + pool.add_entry( + PooledCredential( + provider="xai-oauth", + id=uuid.uuid4().hex[:6], + label="test", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="manual:xai_pkce", + access_token=seemingly_fresh, + refresh_token="rt-revoked", + base_url=DEFAULT_XAI_OAUTH_BASE_URL, + ) + ) + pool.select() + refreshed = pool.try_refresh_current() + # Refresh failure must return None so the caller falls through to + # credential rotation / friendly error display. + assert refreshed is None + + +def test_pool_seeded_entry_sync_back_after_refresh(tmp_path, monkeypatch): + """When an entry seeded from the singleton (source='loopback_pkce') + is refreshed by the pool, the new tokens must be written back so a + fresh process load doesn't re-seed the now-consumed refresh token.""" + from agent.credential_pool import load_pool + + hermes_home = tmp_path / "hermes" + near_expiry = _jwt_with_exp(int(time.time()) + 30) + _setup_hermes_auth(hermes_home, access_token=near_expiry, refresh_token="rt-singleton") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + new_access = _jwt_with_exp(int(time.time()) + 3600) + + def _fake_refresh(access_token, refresh_token, **kwargs): + assert refresh_token == "rt-singleton" + return { + "access_token": new_access, + "refresh_token": "rt-rotated", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + "last_refresh": "2026-05-15T03:00:00Z", + } + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh) + + pool = load_pool("xai-oauth") + selected = pool.select() + assert selected is not None + assert selected.access_token == new_access + + raw = json.loads((hermes_home / "auth.json").read_text()) + tokens = raw["providers"]["xai-oauth"]["tokens"] + assert tokens["access_token"] == new_access + assert tokens["refresh_token"] == "rt-rotated" + + +def test_pool_refresh_adopts_singleton_tokens_when_consumed_elsewhere(tmp_path, monkeypatch): + """Multi-process race: another Hermes process refreshed the singleton + (rotating the refresh_token) while this process held a stale in-memory + pool entry. ``_refresh_entry`` must adopt the fresher singleton tokens + BEFORE spending its own (now-consumed) refresh_token, otherwise the + refresh POST would replay the consumed token and fail with + ``refresh_token_reused``. + + Mirrors the proactive sync codex/nous already perform for the same + reason, and is what makes the pool actually safe to share across + profiles + Hermes processes.""" + from agent.credential_pool import load_pool + + hermes_home = tmp_path / "hermes" + in_memory_at = _jwt_with_exp(int(time.time()) + 30) # near-expiry + _setup_hermes_auth(hermes_home, access_token=in_memory_at, refresh_token="rt-stale") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Load the pool once so the in-memory entry is seeded with rt-stale. + pool = load_pool("xai-oauth") + + # Now simulate "another process refreshed the tokens" by overwriting + # the singleton on disk WITHOUT touching this process's pool object. + other_process_at = _jwt_with_exp(int(time.time()) + 3600) + raw = json.loads((hermes_home / "auth.json").read_text()) + raw["providers"]["xai-oauth"]["tokens"] = { + "access_token": other_process_at, + "refresh_token": "rt-rotated-by-other-process", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + } + (hermes_home / "auth.json").write_text(json.dumps(raw)) + + refresh_calls = {"refresh_token_seen": None} + final_at = _jwt_with_exp(int(time.time()) + 7200) + + def _fake_refresh(access_token, refresh_token, **kwargs): + # The pool MUST have adopted the rotated token from auth.json before + # POSTing the refresh — otherwise it would replay the stale one. + refresh_calls["refresh_token_seen"] = refresh_token + return { + "access_token": final_at, + "refresh_token": "rt-final", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + "last_refresh": "2026-05-15T05:00:00Z", + } + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh) + + selected = pool.select() + assert selected is not None + assert refresh_calls["refresh_token_seen"] == "rt-rotated-by-other-process" + assert selected.access_token == final_at + + +def test_pool_refresh_recovers_when_other_process_already_refreshed(tmp_path, monkeypatch): + """Variant of the multi-process race where the other process refreshes + BETWEEN our proactive sync and the HTTP POST. Our refresh fails with a + consumed-token error; we must re-check auth.json, find the fresh pair + (written by the racing process), and adopt it instead of marking the + entry exhausted.""" + from agent.credential_pool import load_pool + + hermes_home = tmp_path / "hermes" + in_memory_at = _jwt_with_exp(int(time.time()) + 30) + _setup_hermes_auth(hermes_home, access_token=in_memory_at, refresh_token="rt-shared") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + pool = load_pool("xai-oauth") + + other_process_at = _jwt_with_exp(int(time.time()) + 3600) + + def _fake_refresh(access_token, refresh_token, **kwargs): + # Simulate the racing process winning at the auth server right + # before our POST: by the time we reach this call, auth.json + # already holds the fresher pair, but we POSTed with rt-shared. + raw = json.loads((hermes_home / "auth.json").read_text()) + raw["providers"]["xai-oauth"]["tokens"] = { + "access_token": other_process_at, + "refresh_token": "rt-rotated", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + } + (hermes_home / "auth.json").write_text(json.dumps(raw)) + raise AuthError( + "refresh_token_reused", + provider="xai-oauth", + code="xai_refresh_failed", + relogin_required=True, + ) + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh) + + selected = pool.select() + # Even though refresh_xai_oauth_pure raised, the post-failure + # recovery path should adopt the fresher singleton tokens. + assert selected is not None + assert selected.access_token == other_process_at + assert selected.refresh_token == "rt-rotated" + + +def test_pool_exhausted_xai_entry_recovers_after_singleton_refresh(tmp_path, monkeypatch): + """When a singleton-seeded entry is parked as STATUS_EXHAUSTED and the + user runs ``hermes model`` -> xAI Grok OAuth (or another process + refreshes), the next ``_available_entries`` pass must adopt the fresh + auth.json tokens instead of leaving the entry frozen until the + cooldown elapses. Mirrors the codex/nous self-heal pattern.""" + from agent.credential_pool import load_pool, STATUS_EXHAUSTED + from dataclasses import replace + + hermes_home = tmp_path / "hermes" + stale_at = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=stale_at, refresh_token="rt-stale") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + pool = load_pool("xai-oauth") + seeded = pool.entries()[0] + assert seeded.source == "loopback_pkce" + + # Park the seeded entry as exhausted with a far-future cooldown so + # without resync it would never be selectable. + exhausted = replace( + seeded, + last_status=STATUS_EXHAUSTED, + last_status_at=time.time(), + last_error_code=401, + last_error_reset_at=time.time() + 3600, # 1h cooldown + ) + pool._replace_entry(seeded, exhausted) + pool._persist() + assert pool.has_credentials() + assert not pool.has_available() # cooldown blocks everything + + # Simulate the user re-running `hermes model` -> xAI Grok OAuth: the + # singleton now has fresh tokens. + fresh_at = _jwt_with_exp(int(time.time()) + 7200) + raw = json.loads((hermes_home / "auth.json").read_text()) + raw["providers"]["xai-oauth"]["tokens"] = { + "access_token": fresh_at, + "refresh_token": "rt-fresh", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + } + (hermes_home / "auth.json").write_text(json.dumps(raw)) + + # _available_entries must sync from the singleton, lifting the + # exhausted state for the seeded entry. + available = pool._available_entries(clear_expired=True, refresh=False) + assert len(available) == 1 + assert available[0].access_token == fresh_at + assert available[0].refresh_token == "rt-fresh" + assert available[0].last_status != STATUS_EXHAUSTED + + +def test_pool_manual_xai_entry_not_synced_from_singleton(tmp_path, monkeypatch): + """Sync from the singleton must apply ONLY to the singleton-seeded + entry (source='loopback_pkce'). Manually added entries (e.g. via + ``hermes auth add xai-oauth``) own their own refresh-token lifecycle + and must not be silently overwritten when the user logs in via + ``hermes model``.""" + from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential + import uuid + + hermes_home = tmp_path / "hermes" + singleton_at = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=singleton_at, refresh_token="rt-singleton") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + pool = load_pool("xai-oauth") + + manual_at_old = _jwt_with_exp(int(time.time()) + 30) + pool.add_entry( + PooledCredential( + provider="xai-oauth", + id=uuid.uuid4().hex[:6], + label="manual", + auth_type=AUTH_TYPE_OAUTH, + priority=1, + source="manual:xai_pkce", + access_token=manual_at_old, + refresh_token="rt-manual", + base_url=DEFAULT_XAI_OAUTH_BASE_URL, + ) + ) + manual_entry = next(e for e in pool.entries() if e.source == "manual:xai_pkce") + synced = pool._sync_xai_oauth_entry_from_auth_store(manual_entry) + # Same object — no sync happened. + assert synced is manual_entry + assert synced.access_token == manual_at_old + assert synced.refresh_token == "rt-manual" + + +def test_pool_manual_entry_does_not_sync_back_to_singleton(tmp_path, monkeypatch): + """`hermes auth add xai-oauth` entries (source='manual:xai_pkce') are + independent credentials and must NOT write to the singleton. Sync-back + is restricted to entries seeded from the singleton. Otherwise adding a + second pool credential would silently overwrite the user's main login.""" + from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential + import uuid + + hermes_home = tmp_path / "hermes" + # Singleton has its own tokens (separate login). + singleton_at = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=singleton_at, refresh_token="rt-singleton") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + manual_at_old = _jwt_with_exp(int(time.time()) + 30) + manual_at_new = _jwt_with_exp(int(time.time()) + 7200) + + def _fake_refresh(access_token, refresh_token, **kwargs): + assert refresh_token == "rt-manual" + return { + "access_token": manual_at_new, + "refresh_token": "rt-manual-new", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + "last_refresh": "2026-05-15T04:00:00Z", + } + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh) + + pool = load_pool("xai-oauth") + pool.add_entry( + PooledCredential( + provider="xai-oauth", + id=uuid.uuid4().hex[:6], + label="manual", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="manual:xai_pkce", + access_token=manual_at_old, + refresh_token="rt-manual", + base_url=DEFAULT_XAI_OAUTH_BASE_URL, + ) + ) + # Refresh the manual entry — singleton must be left alone. + manual_entries = [e for e in pool.entries() if e.source == "manual:xai_pkce"] + assert len(manual_entries) == 1 + pool._refresh_entry(manual_entries[0], force=True) + + raw = json.loads((hermes_home / "auth.json").read_text()) + tokens = raw["providers"]["xai-oauth"]["tokens"] + # Singleton must be untouched — manual refresh shouldn't leak across. + assert tokens["access_token"] == singleton_at + assert tokens["refresh_token"] == "rt-singleton" + + +# --------------------------------------------------------------------------- +# Auxiliary client routing +# --------------------------------------------------------------------------- + + +def test_auxiliary_client_routes_xai_oauth_through_responses_api(tmp_path, monkeypatch): + """Without explicit xai-oauth handling in ``resolve_provider_client``, an + xai-oauth main provider falls through to the generic ``oauth_external`` + arm and returns ``(None, None)`` — silently re-routing every auxiliary + task (compression, curator, web extract, session search, ...) to + whatever Step-2 fallback chain the user has configured (OpenRouter, + Nous, etc.). Users on xAI Grok OAuth would then see surprise charges + on those side providers for side tasks they thought were running on + their xAI subscription. + + Pin the routing contract: ``resolve_provider_client("xai-oauth", model)`` + must return a non-None client wrapping the xAI Responses API.""" + from agent.auxiliary_client import ( + CodexAuxiliaryClient, + resolve_provider_client, + ) + + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=fresh) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False) + monkeypatch.delenv("XAI_BASE_URL", raising=False) + + client, model = resolve_provider_client("xai-oauth", model="grok-4") + assert client is not None, ( + "xai-oauth must route to a Responses-API client; falling through to " + "the generic oauth_external branch silently swaps providers for " + "every auxiliary task." + ) + assert isinstance(client, CodexAuxiliaryClient) + assert model == "grok-4" + # The wrapper preserves base_url + api_key so async wrappers and cache + # eviction can introspect them. Pin both to the live xAI runtime. + assert str(client.base_url).rstrip("/") == DEFAULT_XAI_OAUTH_BASE_URL + assert client.api_key == fresh + + +def test_auxiliary_client_xai_oauth_returns_none_when_unauthenticated(tmp_path, monkeypatch): + """No xAI OAuth tokens in the auth store → ``resolve_provider_client`` + must return ``(None, None)`` so ``_resolve_auto`` falls through to the + next provider in the chain instead of crashing or constructing a + misconfigured client.""" + from agent.auxiliary_client import resolve_provider_client + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}})) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + client, model = resolve_provider_client("xai-oauth", model="grok-4") + assert client is None + assert model is None + + +def test_auxiliary_client_xai_oauth_requires_explicit_model(tmp_path, monkeypatch): + """xAI's Responses API has no safe "cheap aux model" default — + pinning one would silently rot the same way Codex's did. Callers + must pass an explicit model (auxiliary..model in config.yaml).""" + from agent.auxiliary_client import resolve_provider_client + + hermes_home = tmp_path / "hermes" + fresh = _jwt_with_exp(int(time.time()) + 3600) + _setup_hermes_auth(hermes_home, access_token=fresh) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + client, model = resolve_provider_client("xai-oauth", model=None) + assert client is None + assert model is None + + +# --------------------------------------------------------------------------- +# active_provider preservation on pool sync-back +# --------------------------------------------------------------------------- + + +def test_pool_sync_back_preserves_active_provider(tmp_path, monkeypatch): + """A token-rotation sync-back is a side effect of refresh, not the user + picking a provider. ``_save_provider_state`` flips ``active_provider``; + using it on the sync-back path means every xAI/Codex/Nous refresh in a + multi-provider setup silently overrides the user's chosen active + provider (visible to ``hermes auth status``, ``hermes setup``, and the + ``hermes`` no-arg dispatcher). Pin the ``set_active=False`` contract so + no future refactor regresses to the legacy semantic.""" + from agent.credential_pool import load_pool + + hermes_home = tmp_path / "hermes" + near_expiry = _jwt_with_exp(int(time.time()) + 30) + _setup_hermes_auth(hermes_home, access_token=near_expiry, refresh_token="rt-xai") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Simulate a multi-provider user whose actual chosen provider is + # OpenRouter — xai-oauth tokens exist in the singleton but are NOT + # the active provider. + raw = json.loads((hermes_home / "auth.json").read_text()) + raw["active_provider"] = "openrouter" + (hermes_home / "auth.json").write_text(json.dumps(raw)) + + new_access = _jwt_with_exp(int(time.time()) + 3600) + + def _fake_refresh(access_token, refresh_token, **kwargs): + return { + "access_token": new_access, + "refresh_token": "rt-rotated", + "id_token": "", + "expires_in": 3600, + "token_type": "Bearer", + "last_refresh": "2026-05-15T10:00:00Z", + } + + monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh) + + pool = load_pool("xai-oauth") + selected = pool.select() + assert selected is not None + assert selected.access_token == new_access + + # The refresh wrote new tokens back into the singleton — the user's + # prior ``active_provider`` choice (openrouter) MUST survive. + raw_after = json.loads((hermes_home / "auth.json").read_text()) + assert raw_after["active_provider"] == "openrouter", ( + "pool sync-back must not flip active_provider; otherwise xAI/Codex/" + "Nous token rotations silently take over multi-provider users' " + "auth.json `active_provider` flag." + ) + # Tokens were actually written so the next process won't replay the + # consumed refresh_token (preserves the original sync-back fix). + state = raw_after["providers"]["xai-oauth"]["tokens"] + assert state["access_token"] == new_access + assert state["refresh_token"] == "rt-rotated" diff --git a/tests/plugins/image_gen/test_xai_provider.py b/tests/plugins/image_gen/test_xai_provider.py index b5cfdf16a9b..88ce31813e4 100644 --- a/tests/plugins/image_gen/test_xai_provider.py +++ b/tests/plugins/image_gen/test_xai_provider.py @@ -72,10 +72,13 @@ class TestXAIImageGenProvider: provider = XAIImageGenProvider() schema = provider.get_setup_schema() - assert schema["name"] == "xAI (Grok)" + assert schema["name"] == "xAI Grok Imagine (image)" assert schema["badge"] == "paid" - assert len(schema["env_vars"]) == 1 - assert schema["env_vars"][0]["key"] == "XAI_API_KEY" + # Auth resolution is delegated to the shared "xai_grok" post_setup + # hook so the picker doesn't blindly prompt for XAI_API_KEY when the + # user is already signed in via xAI Grok OAuth. + assert schema["env_vars"] == [] + assert schema["post_setup"] == "xai_grok" # --------------------------------------------------------------------------- diff --git a/tests/plugins/video_gen/test_xai_plugin.py b/tests/plugins/video_gen/test_xai_plugin.py index 25695d852e5..bd7a880fdee 100644 --- a/tests/plugins/video_gen/test_xai_plugin.py +++ b/tests/plugins/video_gen/test_xai_plugin.py @@ -54,6 +54,50 @@ def test_xai_generate_requires_xai_key(monkeypatch): assert result["error_type"] == "auth_required" +def test_xai_available_with_oauth_only(monkeypatch): + """The plugin must honour xAI Grok OAuth credentials, not just + XAI_API_KEY. Otherwise the agent's tool-availability check filters + ``video_generate`` out of the toolbelt and the agent silently falls + back to whatever skill advertises video generation (e.g. comfyui). + """ + import plugins.video_gen.xai as xai_plugin + + monkeypatch.delenv("XAI_API_KEY", raising=False) + monkeypatch.setattr( + "tools.xai_http.resolve_xai_http_credentials", + lambda: { + "provider": "xai-oauth", + "api_key": "oauth-bearer-token", + "base_url": "https://api.x.ai/v1", + }, + ) + + assert xai_plugin.XAIVideoGenProvider().is_available() is True + + +def test_xai_resolved_credentials_threaded_through_request(monkeypatch): + """OAuth-resolved creds must reach the HTTP layer — bug class where + ``is_available()`` says yes but the request still hits with no key. + """ + import plugins.video_gen.xai as xai_plugin + + monkeypatch.delenv("XAI_API_KEY", raising=False) + monkeypatch.setattr( + "tools.xai_http.resolve_xai_http_credentials", + lambda: { + "provider": "xai-oauth", + "api_key": "oauth-bearer-token", + "base_url": "https://api.x.ai/v1", + }, + ) + + api_key, base_url = xai_plugin._resolve_xai_credentials() + assert api_key == "oauth-bearer-token" + assert base_url == "https://api.x.ai/v1" + headers = xai_plugin._xai_headers(api_key) + assert headers["Authorization"] == "Bearer oauth-bearer-token" + + def test_xai_no_operation_kwarg(): """The ABC's generate() signature no longer accepts 'operation'. Passing it through **kwargs should be ignored (forward-compat).""" diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index 47c491c441c..8cc02629523 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -578,6 +578,197 @@ def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch): assert result["final_response"] == "Recovered after refresh" +def _build_xai_oauth_agent(monkeypatch): + _patch_agent_bootstrap(monkeypatch) + agent = run_agent.AIAgent( + model="grok-code-fast-1", + provider="xai-oauth", + api_mode="codex_responses", + base_url="https://api.x.ai/v1", + api_key="xai-oauth-token", + quiet_mode=True, + max_iterations=4, + skip_context_files=True, + skip_memory=True, + ) + agent._cleanup_task_resources = lambda task_id: None + agent._persist_session = lambda messages, history=None: None + agent._save_trajectory = lambda messages, user_message, completed: None + agent._save_session_log = lambda messages: None + return agent + + +def test_build_api_kwargs_xai_oauth_sends_cache_key_via_extra_body(monkeypatch): + """xai-oauth + codex_responses must route prompt caching via the + ``prompt_cache_key`` body field on /v1/responses (xAI's documented + Responses-API cache key — see docs.x.ai prompt-caching/maximizing- + cache-hits). + + We pass it through ``extra_body`` rather than as a top-level kwarg so + the body field is serialized into JSON regardless of whether the + installed openai SDK build still accepts ``prompt_cache_key`` on + ``Responses.stream()``. Older or trimmed SDK builds drop it from the + signature and would otherwise raise ``TypeError`` before the request + reaches api.x.ai. The ``x-grok-conv-id`` header is retained as a + belt-and-braces fallback for clients/proxies that route on headers.""" + agent = _build_xai_oauth_agent(monkeypatch) + kwargs = agent._build_api_kwargs( + [ + {"role": "system", "content": "You are Hermes."}, + {"role": "user", "content": "Ping"}, + ] + ) + + assert kwargs.get("model") == "grok-code-fast-1" + # Top-level kwarg must NOT be set — that's the openai SDK + # incompatibility this whole indirection exists to dodge. + assert "prompt_cache_key" not in kwargs + extra_body = kwargs.get("extra_body") or {} + assert extra_body.get("prompt_cache_key"), ( + "xAI prompt-cache routing must travel via extra_body.prompt_cache_key " + "for /v1/responses — body field is the documented surface." + ) + headers = kwargs.get("extra_headers") or {} + assert "x-grok-conv-id" in headers, ( + "x-grok-conv-id header kept as belt-and-braces fallback for clients " + "that route on headers." + ) + + +def test_run_conversation_xai_oauth_refreshes_after_401_and_retries(monkeypatch): + """xai-oauth speaks the Responses API just like codex. When the access + token is rejected mid-call (401), the same proactive refresh-and-retry + handler that fires for openai-codex must also fire for xai-oauth — the + bug it caught: the gating condition checked only ``provider == "openai-codex"``, + so xai-oauth 401s leaked straight to non-retryable abort path with no + chance to swap in a freshly refreshed access token.""" + agent = _build_xai_oauth_agent(monkeypatch) + calls = {"api": 0, "refresh": 0} + + class _UnauthorizedError(RuntimeError): + def __init__(self): + super().__init__("Error code: 401 - unauthorized") + self.status_code = 401 + + def _fake_api_call(api_kwargs): + calls["api"] += 1 + if calls["api"] == 1: + raise _UnauthorizedError() + return _codex_message_response("Recovered after xAI refresh") + + def _fake_refresh(*, force=True): + calls["refresh"] += 1 + assert force is True + return True + + monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call) + monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh) + + result = agent.run_conversation("Say OK") + + assert calls["api"] == 2 + assert calls["refresh"] == 1 + assert result["completed"] is True + assert result["final_response"] == "Recovered after xAI refresh" + + +def test_try_refresh_codex_client_credentials_handles_xai_oauth(monkeypatch): + """``_try_refresh_codex_client_credentials`` must rebuild the OpenAI + client with freshly resolved xAI OAuth credentials when the active + provider is xai-oauth. The function name is shared between codex and + xai-oauth (both speak codex_responses) — covering both cases prevents + silent regressions where the function gets gated to a single provider.""" + agent = _build_xai_oauth_agent(monkeypatch) + closed = {"value": False} + rebuilt = {"kwargs": None} + + class _ExistingClient: + def close(self): + closed["value"] = True + + class _RebuiltClient: + pass + + def _fake_openai(**kwargs): + rebuilt["kwargs"] = kwargs + return _RebuiltClient() + + def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_): + # The pre-refresh guard reads the singleton with refresh_if_expiring=False + # to verify that the agent's active key still matches; the actual + # refresh later passes force_refresh=True. Both calls must succeed. + return { + "api_key": "fresh-xai-token" if force_refresh else agent.api_key, + "base_url": "https://api.x.ai/v1", + } + + monkeypatch.setattr( + "hermes_cli.auth.resolve_xai_oauth_runtime_credentials", + _fake_resolve, + ) + monkeypatch.setattr(run_agent, "OpenAI", _fake_openai) + + agent.client = _ExistingClient() + ok = agent._try_refresh_codex_client_credentials(force=True) + + assert ok is True + assert closed["value"] is True + assert rebuilt["kwargs"]["api_key"] == "fresh-xai-token" + assert rebuilt["kwargs"]["base_url"] == "https://api.x.ai/v1" + assert isinstance(agent.client, _RebuiltClient) + assert agent.api_key == "fresh-xai-token" + + +def test_try_refresh_codex_client_credentials_skips_xai_oauth_when_singleton_differs(monkeypatch): + """An xai-oauth agent constructed with a non-singleton credential + (e.g. a manual pool entry whose tokens belong to a different account + than the loopback_pkce singleton, or an explicit ``api_key=`` arg) + MUST NOT silently adopt the singleton's tokens on a 401 reactive + refresh. Otherwise a 401 mid-conversation would re-route the rest + of the conversation onto a different account, with no user feedback. + + The credential pool's reactive recovery is the right channel for + pool-managed credentials; this fallback path is for the singleton- + only case and must short-circuit when the active key differs.""" + agent = _build_xai_oauth_agent(monkeypatch) + # Agent is using "xai-oauth-token" (per the builder); singleton holds + # a *different* account's token. No force_refresh should fire. + refresh_calls = {"count": 0} + + def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_): + if force_refresh: + refresh_calls["count"] += 1 + return { + "api_key": "singleton-account-token", + "base_url": "https://api.x.ai/v1", + } + # The pre-refresh guard read — return the singleton's view of the + # singleton's token, which is NOT what the agent is currently using. + return { + "api_key": "singleton-account-token", + "base_url": "https://api.x.ai/v1", + } + + monkeypatch.setattr( + "hermes_cli.auth.resolve_xai_oauth_runtime_credentials", + _fake_resolve, + ) + + pre_refresh_key = agent.api_key + ok = agent._try_refresh_codex_client_credentials(force=True) + + assert ok is False, ( + "must not refresh when the active credential isn't the singleton; " + "otherwise the conversation silently swaps accounts mid-flight." + ) + assert refresh_calls["count"] == 0, ( + "force_refresh must not run — that would mutate the singleton's " + "tokens on disk and consume its single-use refresh_token for an " + "agent that wasn't even using the singleton." + ) + assert agent.api_key == pre_refresh_key + + def test_run_conversation_copilot_refreshes_after_401_and_retries(monkeypatch): agent = _build_copilot_agent(monkeypatch) calls = {"api": 0, "refresh": 0} @@ -624,12 +815,18 @@ def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch): rebuilt["kwargs"] = kwargs return _RebuiltClient() + def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_): + # Pre-refresh guard reads the singleton (refresh_if_expiring=False). + # It must report the agent's current api_key so the equality check + # passes; only then does the actual force_refresh run. + return { + "api_key": "new-codex-token" if force_refresh else agent.api_key, + "base_url": "https://chatgpt.com/backend-api/codex", + } + monkeypatch.setattr( "hermes_cli.auth.resolve_codex_runtime_credentials", - lambda force_refresh=True: { - "api_key": "new-codex-token", - "base_url": "https://chatgpt.com/backend-api/codex", - }, + _fake_resolve, ) monkeypatch.setattr(run_agent, "OpenAI", _fake_openai) diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index 942fba01120..6f6d2f8c2a3 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -266,10 +266,12 @@ def _get_provider(stt_config: dict) -> str: return "none" if provider == "xai": - if get_env_value("XAI_API_KEY"): + from tools.xai_http import resolve_xai_http_credentials + + if resolve_xai_http_credentials().get("api_key"): return "xai" logger.warning( - "STT provider 'xai' configured but XAI_API_KEY not set" + "STT provider 'xai' configured but no xAI credentials are available" ) return "none" @@ -289,9 +291,14 @@ def _get_provider(stt_config: dict) -> str: if _HAS_OPENAI and _has_openai_audio_backend(): logger.info("No local STT available, using OpenAI Whisper API") return "openai" - if get_env_value("XAI_API_KEY"): - logger.info("No local STT available, using xAI Grok STT API") - return "xai" + try: + from tools.xai_http import resolve_xai_http_credentials + + if resolve_xai_http_credentials().get("api_key"): + logger.info("No local STT available, using xAI Grok STT API") + return "xai" + except Exception: + pass return "none" # --------------------------------------------------------------------------- @@ -704,14 +711,22 @@ def _transcribe_xai(file_path: str, model_name: str) -> Dict[str, Any]: Supports Inverse Text Normalization, diarization, and word-level timestamps. Requires ``XAI_API_KEY`` environment variable. """ - api_key = get_env_value("XAI_API_KEY") + from tools.xai_http import resolve_xai_http_credentials + + creds = resolve_xai_http_credentials() + api_key = str(creds.get("api_key") or "").strip() if not api_key: - return {"success": False, "transcript": "", "error": "XAI_API_KEY not set"} + return { + "success": False, + "transcript": "", + "error": "No xAI credentials found. Configure xAI OAuth in `hermes model` or set XAI_API_KEY", + } stt_config = _load_stt_config() xai_config = stt_config.get("xai", {}) base_url = str( xai_config.get("base_url") + or creds.get("base_url") or get_env_value("XAI_STT_BASE_URL") or XAI_STT_BASE_URL ).strip().rstrip("/") @@ -872,7 +887,7 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A "No STT provider available. Install faster-whisper for free local " f"transcription, configure {LOCAL_STT_COMMAND_ENV} or install a local whisper CLI, " "set GROQ_API_KEY for free Groq Whisper, set MISTRAL_API_KEY for Mistral " - "Voxtral Transcribe, set XAI_API_KEY for xAI Grok STT, or set VOICE_TOOLS_OPENAI_KEY " + "Voxtral Transcribe, configure xAI OAuth or set XAI_API_KEY for xAI Grok STT, or set VOICE_TOOLS_OPENAI_KEY " "or OPENAI_API_KEY for the OpenAI Whisper API." ), } diff --git a/tools/tts_tool.py b/tools/tts_tool.py index 9f0d272dac0..57907f76833 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -9,7 +9,7 @@ Built-in TTS providers: - MiniMax TTS: High-quality with voice cloning, needs MINIMAX_API_KEY - Mistral (Voxtral TTS): Multilingual, native Opus, needs MISTRAL_API_KEY - Google Gemini TTS: Controllable, 30 prebuilt voices, needs GEMINI_API_KEY -- xAI TTS: Grok voices, needs XAI_API_KEY +- xAI TTS: Grok voices, uses xAI Grok OAuth credentials or XAI_API_KEY - NeuTTS (local, free, no API key): On-device TTS via neutts - KittenTTS (local, free, no API key): On-device 25MB model - Piper (local, free, no API key): OHF-Voice/piper1-gpl neural VITS, 44 languages @@ -902,9 +902,12 @@ def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) - """ import requests - api_key = (get_env_value("XAI_API_KEY") or "").strip() + from tools.xai_http import resolve_xai_http_credentials + + creds = resolve_xai_http_credentials() + api_key = str(creds.get("api_key") or "").strip() if not api_key: - raise ValueError("XAI_API_KEY not set. Get one at https://console.x.ai/") + raise ValueError("No xAI credentials found. Configure xAI OAuth in `hermes model` or set XAI_API_KEY.") xai_config = tts_config.get("xai", {}) voice_id = str(xai_config.get("voice_id", DEFAULT_XAI_VOICE_ID)).strip() or DEFAULT_XAI_VOICE_ID @@ -913,6 +916,7 @@ def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) - bit_rate = int(xai_config.get("bit_rate", DEFAULT_XAI_BIT_RATE)) base_url = str( xai_config.get("base_url") + or creds.get("base_url") or get_env_value("XAI_BASE_URL") or DEFAULT_XAI_BASE_URL ).strip().rstrip("/") @@ -1917,8 +1921,13 @@ def check_tts_requirements() -> bool: pass if get_env_value("MINIMAX_API_KEY"): return True - if get_env_value("XAI_API_KEY"): - return True + try: + from tools.xai_http import resolve_xai_http_credentials + + if resolve_xai_http_credentials().get("api_key"): + return True + except Exception: + pass if get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY"): return True try: diff --git a/tools/xai_http.py b/tools/xai_http.py index b5bce97c2f4..fbb7961d244 100644 --- a/tools/xai_http.py +++ b/tools/xai_http.py @@ -2,6 +2,9 @@ from __future__ import annotations +import os +from typing import Dict + def hermes_xai_user_agent() -> str: """Return a stable Hermes-specific User-Agent for xAI HTTP calls.""" @@ -10,3 +13,49 @@ def hermes_xai_user_agent() -> str: except Exception: __version__ = "unknown" return f"Hermes-Agent/{__version__}" + + +def resolve_xai_http_credentials() -> Dict[str, str]: + """Resolve bearer credentials for direct xAI HTTP endpoints. + + Prefers Hermes-managed xAI OAuth credentials when available, then falls back + to ``XAI_API_KEY`` from the environment. This keeps direct xAI endpoints + (images, TTS, STT, etc.) aligned with the main runtime auth model. + """ + try: + from hermes_cli.runtime_provider import resolve_runtime_provider + + runtime = resolve_runtime_provider(requested="xai-oauth") + access_token = str(runtime.get("api_key") or "").strip() + base_url = str(runtime.get("base_url") or "").strip().rstrip("/") + if access_token: + return { + "provider": "xai-oauth", + "api_key": access_token, + "base_url": base_url or "https://api.x.ai/v1", + } + except Exception: + pass + + try: + from hermes_cli.auth import resolve_xai_oauth_runtime_credentials + + creds = resolve_xai_oauth_runtime_credentials() + access_token = str(creds.get("api_key") or "").strip() + base_url = str(creds.get("base_url") or "").strip().rstrip("/") + if access_token: + return { + "provider": "xai-oauth", + "api_key": access_token, + "base_url": base_url or "https://api.x.ai/v1", + } + except Exception: + pass + + api_key = os.getenv("XAI_API_KEY", "").strip() + base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/") + return { + "provider": "xai", + "api_key": api_key, + "base_url": base_url, + } diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md new file mode 100644 index 00000000000..49c7087621a --- /dev/null +++ b/website/docs/guides/xai-grok-oauth.md @@ -0,0 +1,214 @@ +--- +sidebar_position: 16 +title: "xAI Grok OAuth (SuperGrok Subscription)" +description: "Sign in with your SuperGrok subscription to use Grok models in Hermes Agent — no API key required" +--- + +# xAI Grok OAuth (SuperGrok Subscription) + +Hermes Agent supports xAI Grok through a browser-based OAuth login flow against [accounts.x.ai](https://accounts.x.ai), using your existing **SuperGrok subscription**. No `XAI_API_KEY` is required — log in once and Hermes automatically refreshes your session in the background. + +The transport reuses the `codex_responses` adapter (xAI exposes a Responses-style endpoint), so reasoning, tool-calling, streaming, and prompt caching work without any adapter changes. + +The same OAuth bearer token is also reused by every direct-to-xAI surface in Hermes — TTS, image generation, video generation, and transcription — so a single login covers all four. + +## Overview + +| Item | Value | +|------|-------| +| Provider ID | `xai-oauth` | +| Display name | xAI Grok OAuth (SuperGrok Subscription) | +| Auth type | Browser OAuth 2.0 PKCE (loopback callback) | +| Transport | xAI Responses API (`codex_responses`) | +| Default model | `grok-4.3` | +| Endpoint | `https://api.x.ai/v1` | +| Auth server | `https://accounts.x.ai` | +| Requires env var | No (`XAI_API_KEY` is **not** used for this provider) | +| Subscription | [SuperGrok](https://x.ai/grok) (any active tier) | + +## Prerequisites + +- Python 3.9+ +- Hermes Agent installed +- An active SuperGrok subscription on your xAI account +- A browser available on the local machine (or use `--no-browser` for remote sessions) + +## Quick Start + +```bash +# Launch the provider and model picker +hermes model +# → Select "xAI Grok OAuth (SuperGrok Subscription)" from the provider list +# → Hermes opens your browser to accounts.x.ai +# → Approve access in the browser +# → Pick a model (grok-4.3 is at the top) +# → Start chatting + +hermes +``` + +After the first login, credentials are stored under `~/.hermes/auth.json` and refreshed automatically before they expire. + +## Logging In Manually + +You can trigger a login without going through the model picker: + +```bash +hermes auth add xai-oauth +``` + +### Remote / headless sessions + +On servers, containers, or SSH sessions where no browser is available, Hermes detects the remote environment and prints the authorization URL instead of opening a browser. Open the URL on any device with a browser, complete the consent flow, and Hermes finishes the loopback exchange when the redirect comes back. + +If you need to force this behaviour explicitly: + +```bash +hermes auth add xai-oauth --no-browser +``` + +## How the Login Works + +1. Hermes opens your browser to `accounts.x.ai`. +2. You sign in (or confirm your existing session) and approve access. +3. xAI redirects back to Hermes and the tokens are saved to `~/.hermes/auth.json`. +4. From then on, Hermes refreshes the access token in the background — you stay signed in until you `hermes auth remove xai-oauth` or revoke access from your xAI account settings. + +## Checking Login Status + +```bash +hermes doctor +``` + +The `◆ Auth Providers` section will show the current state of every provider, including `xai-oauth`. + +## Switching Models + +```bash +hermes model +# → Select "xAI Grok OAuth (SuperGrok Subscription)" +# → Pick from the model list (grok-4.3 is pinned to the top) +``` + +Or set the model directly: + +```bash +hermes config set model.default grok-4.3 +hermes config set model.provider xai-oauth +``` + +## Configuration Reference + +After login, `~/.hermes/config.yaml` will contain: + +```yaml +model: + default: grok-4.3 + provider: xai-oauth + base_url: https://api.x.ai/v1 +``` + +### Provider aliases + +All of the following resolve to `xai-oauth`: + +```bash +hermes --provider xai-oauth # canonical +hermes --provider grok-oauth # alias +hermes --provider x-ai-oauth # alias +hermes --provider xai-grok-oauth # alias +``` + +## Direct-to-xAI Tools (TTS / Image / Video / Transcription) + +Once you're logged in via OAuth, every direct-to-xAI tool reuses the same bearer token automatically — there is **no separate setup** unless you'd rather use an API key. + +To pick a backend for each tool: + +```bash +hermes tools +# → Text-to-Speech → "xAI TTS" +# → Image Generation → "xAI Grok Imagine (image)" +# → Video Generation → "xAI Grok Imagine" +``` + +If OAuth tokens are already stored, the picker confirms it and skips the credential prompt. If neither OAuth nor `XAI_API_KEY` is set, the picker offers a 3-choice menu: OAuth login, paste API key, or skip. + +:::note Video generation is off by default +The `video_gen` toolset is disabled by default. Enable it in `hermes tools` → `🎬 Video Generation` (press space) before the agent can call `video_generate`. Otherwise the agent may fall back to the bundled ComfyUI skill, which is also tagged for video generation. +::: + +### Models + +| Tool | Model | Notes | +|------|-------|-------| +| Chat | `grok-4.3` | Default; auto-selected when you log in via OAuth | +| Chat | `grok-4.20-0309-reasoning` | Reasoning variant | +| Chat | `grok-4.20-0309-non-reasoning` | Non-reasoning variant | +| Chat | `grok-4.20-multi-agent-0309` | Multi-agent variant | +| Image | `grok-imagine-image` | Default; ~5–10 s | +| Image | `grok-imagine-image-quality` | Higher fidelity; ~10–20 s | +| Video | `grok-imagine-video` | Text-to-video and image-to-video; up to 7 reference images | +| TTS | (default voice) | xAI `/v1/tts` endpoint | + +The chat catalog is derived live from the on-disk `models.dev` cache; new xAI releases appear automatically once that cache refreshes. `grok-4.3` is always pinned to the top of the list. + +## Environment Variables + +| Variable | Effect | +|----------|--------| +| `XAI_BASE_URL` | Override the default `https://api.x.ai/v1` endpoint (rarely needed). | +| `HERMES_INFERENCE_PROVIDER` | Force the active provider at runtime, e.g. `HERMES_INFERENCE_PROVIDER=xai-oauth hermes`. | + +## Troubleshooting + +### Token expired — not re-logging in automatically + +Hermes refreshes the token before each session and again reactively on a 401. If refresh fails with `invalid_grant` (the refresh token was revoked, or the account was rotated), Hermes surfaces a typed re-auth message instead of crashing. + +**Fix:** run `hermes auth add xai-oauth` again to start a fresh login. + +### Authorization timed out + +The loopback listener has a finite expiry window (default 180 s). If you don't approve the login in time, Hermes raises a timeout error. + +**Fix:** re-run `hermes auth add xai-oauth` (or `hermes model`). The flow starts fresh. + +### State mismatch (possible CSRF) + +Hermes detected that the `state` value returned by the authorization server doesn't match what it sent. + +**Fix:** re-run the login. If it persists, check for a proxy or redirect that is modifying the OAuth response. + +### Logging in from a remote server + +On SSH or container sessions Hermes prints the authorization URL instead of opening a browser. Open the URL on any device with a browser and complete the consent there — the loopback callback comes back to your remote host. + +You can also force this behaviour: + +```bash +hermes auth add xai-oauth --no-browser +``` + +### "No xAI credentials found" error at runtime + +The auth store has no `xai-oauth` entry and no `XAI_API_KEY` is set. You haven't logged in yet, or the credential file was deleted. + +**Fix:** run `hermes model` and pick the xAI Grok OAuth provider, or run `hermes auth add xai-oauth`. + +## Logging Out + +To remove stored xAI Grok OAuth credentials: + +```bash +hermes auth remove xai-oauth +``` + +This clears both the singleton `loopback_pkce` entry in `auth.json` and any matching credential-pool rows. + +## See Also + +- [AI Providers reference](../integrations/providers.md) +- [Environment Variables](../reference/environment-variables.md) +- [Configuration](../user-guide/configuration.md) +- [Voice & TTS](../user-guide/features/tts.md) diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index af9e07814d7..e7b2e5ab86d 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -331,6 +331,8 @@ When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoin xAI is wired through the Responses API (`codex_responses` transport) for automatic reasoning support on Grok 4 models — no `reasoning_effort` parameter needed, the server reasons by default. Set `XAI_API_KEY` in `~/.hermes/.env` and pick xAI in `hermes model`, or drop `grok` as a shortcut into `/model grok-4-1-fast-reasoning`. +SuperGrok subscribers can sign in with browser OAuth instead of using an API key — pick **xAI Grok OAuth (SuperGrok Subscription)** in `hermes model`, or run `hermes auth add xai-oauth`. The same OAuth bearer token is automatically reused by direct-to-xAI tools (TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md) for the full flow. + When using xAI as a provider (any base URL containing `x.ai`), Hermes automatically enables prompt caching by sending the `x-grok-conv-id` header with every API request. This routes requests to the same server within a conversation session, allowing xAI's infrastructure to reuse cached system prompts and conversation history. No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations. @@ -1444,7 +1446,7 @@ fallback_model: When activated, the fallback swaps the model and provider mid-session without losing your conversation. The chain is tried entry-by-entry; activation is one-shot per session. -Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `azure-foundry`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `lmstudio`, `alibaba`, `alibaba-coding-plan`, `tencent-tokenhub`, `custom`. +Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `xai-oauth`, `ollama-cloud`, `bedrock`, `ai-gateway`, `azure-foundry`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `lmstudio`, `alibaba`, `alibaba-coding-plan`, `tencent-tokenhub`, `custom`. :::tip Fallback is configured exclusively through `config.yaml` — or interactively via `hermes fallback`. For full details on when it triggers, how the chain advances, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers). diff --git a/website/sidebars.ts b/website/sidebars.ts index a8d893d6e72..a0fb24b8c50 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -191,6 +191,7 @@ const sidebars: SidebarsConfig = { 'guides/migrate-from-openclaw', 'guides/aws-bedrock', 'guides/azure-foundry', + 'guides/xai-grok-oauth', 'guides/microsoft-graph-app-registration', 'guides/operate-teams-meeting-pipeline', ], From e4d7a5dffaa18676b8567469825c2082658d8557 Mon Sep 17 00:00:00 2001 From: Jaaneek Date: Fri, 15 May 2026 17:43:51 +0100 Subject: [PATCH 044/218] fix(tools): video_gen picker reflects active xAI selection and runs xai_grok post_setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs in the `hermes tools` reconfigure flow caused picking xAI Grok Imagine for video_gen (or image_gen) to feel like a no-op: 1. `_is_provider_active()` had a branch for `image_gen_plugin_name` but none for `video_gen_plugin_name`, so a row marked as the active xAI video provider was never recognized as active. The picker fell through to the env-var fallback in `_detect_active_provider_index()`, which matched the FAL row (because `FAL_KEY` is set), so the picker visually defaulted to FAL even though the user had selected xAI. 2. `_plugin_video_gen_providers()` and `_plugin_image_gen_providers()` built picker rows from the plugin's `get_setup_schema()` but only copied `name`, `badge`, `tag`, `env_vars`. The xAI plugins declare `post_setup: "xai_grok"` so the picker should run the OAuth / API-key prompt hook after selection — that key was silently dropped, so the hook never fired from the picker rows. Adds the missing `video_gen_plugin_name` branch (placed before the `managed_nous_feature` block, mirroring the existing image_gen branch) and propagates `post_setup` from the plugin schema into both picker-row builders. Adds focused tests in `test_video_gen_picker.py` and `test_image_gen_picker.py`. --- hermes_cli/tools_config.py | 43 ++++++----- tests/hermes_cli/test_image_gen_picker.py | 27 +++++++ tests/hermes_cli/test_video_gen_picker.py | 89 +++++++++++++++++++++++ 3 files changed, 141 insertions(+), 18 deletions(-) diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 891ffdeb05a..377194589ea 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -1505,15 +1505,16 @@ def _plugin_image_gen_providers() -> list[dict]: continue if not isinstance(schema, dict): continue - rows.append( - { - "name": schema.get("name", provider.display_name), - "badge": schema.get("badge", ""), - "tag": schema.get("tag", ""), - "env_vars": schema.get("env_vars", []), - "image_gen_plugin_name": provider.name, - } - ) + row = { + "name": schema.get("name", provider.display_name), + "badge": schema.get("badge", ""), + "tag": schema.get("tag", ""), + "env_vars": schema.get("env_vars", []), + "image_gen_plugin_name": provider.name, + } + if schema.get("post_setup"): + row["post_setup"] = schema["post_setup"] + rows.append(row) return rows @@ -1542,15 +1543,16 @@ def _plugin_video_gen_providers() -> list[dict]: continue if not isinstance(schema, dict): continue - rows.append( - { - "name": schema.get("name", provider.display_name), - "badge": schema.get("badge", ""), - "tag": schema.get("tag", ""), - "env_vars": schema.get("env_vars", []), - "video_gen_plugin_name": provider.name, - } - ) + row = { + "name": schema.get("name", provider.display_name), + "badge": schema.get("badge", ""), + "tag": schema.get("tag", ""), + "env_vars": schema.get("env_vars", []), + "video_gen_plugin_name": provider.name, + } + if schema.get("post_setup"): + row["post_setup"] = schema["post_setup"] + rows.append(row) return rows @@ -1814,6 +1816,11 @@ def _is_provider_active(provider: dict, config: dict) -> bool: image_cfg = config.get("image_gen", {}) return isinstance(image_cfg, dict) and image_cfg.get("provider") == plugin_name + video_plugin_name = provider.get("video_gen_plugin_name") + if video_plugin_name: + video_cfg = config.get("video_gen", {}) + return isinstance(video_cfg, dict) and video_cfg.get("provider") == video_plugin_name + managed_feature = provider.get("managed_nous_feature") if managed_feature: features = get_nous_subscription_features(config) diff --git a/tests/hermes_cli/test_image_gen_picker.py b/tests/hermes_cli/test_image_gen_picker.py index 6da847691a7..51eafd6da67 100644 --- a/tests/hermes_cli/test_image_gen_picker.py +++ b/tests/hermes_cli/test_image_gen_picker.py @@ -103,6 +103,33 @@ class TestPluginPickerInjection: visible = tools_config._visible_providers(browser, {}) assert all(p.get("image_gen_plugin_name") is None for p in visible) + def test_post_setup_propagated_when_declared(self, monkeypatch): + from hermes_cli import tools_config + + image_gen_registry.register_provider(_FakeProvider( + "xai_img", + schema={ + "name": "xAI Grok Imagine", + "badge": "paid", + "tag": "grok image", + "env_vars": [], + "post_setup": "xai_grok", + }, + )) + + rows = tools_config._plugin_image_gen_providers() + match = next(r for r in rows if r.get("image_gen_plugin_name") == "xai_img") + assert match["post_setup"] == "xai_grok" + + def test_post_setup_omitted_when_not_declared(self, monkeypatch): + from hermes_cli import tools_config + + image_gen_registry.register_provider(_FakeProvider("plain_img")) + + rows = tools_config._plugin_image_gen_providers() + match = next(r for r in rows if r.get("image_gen_plugin_name") == "plain_img") + assert "post_setup" not in match + class TestPluginCatalog: def test_plugin_catalog_returns_models(self): diff --git a/tests/hermes_cli/test_video_gen_picker.py b/tests/hermes_cli/test_video_gen_picker.py index 85350947c96..c06e2ea2096 100644 --- a/tests/hermes_cli/test_video_gen_picker.py +++ b/tests/hermes_cli/test_video_gen_picker.py @@ -146,3 +146,92 @@ class TestReconfigureWritesProvider: assert config["video_gen"]["provider"] == "noenv_video" assert config["video_gen"]["model"] == "noenv_video-video-v1" assert config["video_gen"]["use_gateway"] is False + + +class TestPluginVideoProvidersRow: + """Tests for _plugin_video_gen_providers row contents.""" + + def test_post_setup_propagated_when_declared(self, monkeypatch): + from hermes_cli import tools_config + + video_gen_registry.register_provider(_FakeVideoProvider( + "xai_video", + schema={ + "name": "xAI Grok Imagine", + "badge": "paid", + "tag": "grok video", + "env_vars": [], + "post_setup": "xai_grok", + }, + )) + + rows = tools_config._plugin_video_gen_providers() + match = next(r for r in rows if r.get("video_gen_plugin_name") == "xai_video") + assert match["post_setup"] == "xai_grok" + + def test_post_setup_omitted_when_not_declared(self, monkeypatch): + from hermes_cli import tools_config + + video_gen_registry.register_provider(_FakeVideoProvider("plain_video")) + + rows = tools_config._plugin_video_gen_providers() + match = next(r for r in rows if r.get("video_gen_plugin_name") == "plain_video") + assert "post_setup" not in match + + +class TestVideoPluginProviderActive: + """Tests for _is_provider_active recognizing video_gen_plugin_name.""" + + def test_active_when_video_gen_provider_matches(self): + from hermes_cli import tools_config + + config = {"video_gen": {"provider": "xai"}} + row = {"name": "xAI Grok Imagine", "video_gen_plugin_name": "xai"} + + assert tools_config._is_provider_active(row, config) is True + + def test_inactive_when_video_gen_provider_differs(self): + from hermes_cli import tools_config + + config = {"video_gen": {"provider": "fal"}} + row = {"name": "xAI Grok Imagine", "video_gen_plugin_name": "xai"} + + assert tools_config._is_provider_active(row, config) is False + + def test_inactive_when_video_gen_section_missing(self): + from hermes_cli import tools_config + + row = {"name": "xAI Grok Imagine", "video_gen_plugin_name": "xai"} + assert tools_config._is_provider_active(row, {}) is False + + def test_detect_active_index_picks_video_plugin_match(self, monkeypatch): + """When xAI is the configured video_gen provider, the picker should + default to the xAI row even if FAL_KEY happens to be set in env. + + Regression: previously _detect_active_provider_index() saw + _is_provider_active(xai) return False (no video_gen branch), + skipped xAI (empty env_vars), and matched the FAL row via the + env-var fallback — so the picker visually defaulted to FAL even + though the user picked xAI. The xAI row uses empty env_vars + because authentication is handled via xAI Grok OAuth (post_setup + hook). + """ + from hermes_cli import tools_config + + monkeypatch.setattr( + tools_config, + "get_env_value", + lambda key: "fal-key" if key == "FAL_KEY" else "", + ) + + config = {"video_gen": {"provider": "xai"}} + providers = [ + {"name": "xAI Grok Imagine", "env_vars": [], "video_gen_plugin_name": "xai"}, + { + "name": "FAL.ai", + "env_vars": [{"key": "FAL_KEY", "prompt": "FAL"}], + "video_gen_plugin_name": "fal", + }, + ] + + assert tools_config._detect_active_provider_index(providers, config) == 0 From 9eef53b9605410ddc4fe1dfa79214a137787141c Mon Sep 17 00:00:00 2001 From: Jaaneek Date: Fri, 15 May 2026 17:44:27 +0100 Subject: [PATCH 045/218] chore(release): map Jaaneek@users.noreply.github.com to Jaaneek The contributor's commit author email is the legacy GitHub noreply form (no leading numeric "id+"), so it doesn't match the check-attribution workflow's auto-resolve regex (\+.*@users\.noreply\.github\.com). Register it explicitly in AUTHOR_MAP so the PR #26457 attribution check passes. --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index f3df43c3fe1..740b79091b1 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -1074,6 +1074,7 @@ AUTHOR_MAP = { "16034932+Arkmusn@users.noreply.github.com": "Arkmusn", # PR #25559 salvage (approvals.timeout from config) "nidhi2894@gmail.com": "nidhi-singh02", # PR #2752 salvage (slack whitespace-only IndexError guard) "38173192+nidhi-singh02@users.noreply.github.com": "nidhi-singh02", + "Jaaneek@users.noreply.github.com": "Jaaneek", # PR #26457 (xAI Grok OAuth provider) } From e13c1b806018427aaf5fbe4b0ff2c6ca6821d6db Mon Sep 17 00:00:00 2001 From: Jaaneek Date: Fri, 15 May 2026 18:27:54 +0100 Subject: [PATCH 046/218] fix(xai-http): preserve ~/.hermes/.env fallback and XAI_STT_BASE_URL precedence The new resolve_xai_http_credentials() resolver was using os.getenv() for the XAI_API_KEY/XAI_BASE_URL fallback path, which dropped the ~/.hermes/.env contract guarded by PR #17140 / #17163. Users with XAI_API_KEY in dotenv only would see "No xAI credentials found" even though the key was configured. Separately, _transcribe_xai started consulting creds["base_url"] (which always returns at least the default https://api.x.ai/v1) ahead of the public XAI_STT_BASE_URL env override, so the per-tool override stopped working. - tools/xai_http.py: add module-level get_env_value() wrapper that reads ~/.hermes/.env first (via hermes_cli.config.get_env_value), then os.environ. Resolver uses it for the API-key/base-url fallback. - tools/transcription_tools.py: restore precedence so XAI_STT_BASE_URL wins over creds["base_url"]. - tests/tools/test_transcription_dotenv_fallback.py + tests/tools/test_tts_dotenv_fallback.py: repoint the per-call-site patches at the new resolution point (tools.xai_http.get_env_value). The end-to-end regression-guard test (which patches load_env) is unchanged and still passes. --- .../test_transcription_dotenv_fallback.py | 13 +++++--- tests/tools/test_tts_dotenv_fallback.py | 7 ++++- tools/transcription_tools.py | 2 +- tools/xai_http.py | 30 ++++++++++++++++--- 4 files changed, 42 insertions(+), 10 deletions(-) diff --git a/tests/tools/test_transcription_dotenv_fallback.py b/tests/tools/test_transcription_dotenv_fallback.py index 73e7a42a59b..a28c777a8f1 100644 --- a/tests/tools/test_transcription_dotenv_fallback.py +++ b/tests/tools/test_transcription_dotenv_fallback.py @@ -170,7 +170,15 @@ class TestTranscribeCallSitesReadDotenv: assert seen_keys == ["mistral-dotenv-key"] def test_transcribe_xai_forwards_dotenv_key(self): + """xAI STT now resolves credentials through ``tools.xai_http`` so the + OAuth bearer wins when present and ``XAI_API_KEY`` is the fallback. + Patch the resolver's ``get_env_value`` to simulate a dotenv-only key + and confirm it reaches the HTTP call. The per-call-site + ``transcription_tools.get_env_value`` is still consulted for the + ``XAI_STT_BASE_URL`` override (covered by ``test_custom_base_url``). + """ from tools import transcription_tools as tt + from tools import xai_http captured: dict = {} @@ -183,15 +191,12 @@ class TestTranscribeCallSitesReadDotenv: response.json.return_value = {"text": "hello"} return response - # get_env_value is consulted for both XAI_API_KEY and XAI_STT_BASE_URL. - # Return the key for the first call, None for base-url override - # (so it defaults to the module-level XAI_STT_BASE_URL). def fake_get_env_value(name, default=None): if name == "XAI_API_KEY": return "xai-dotenv-key" return None - with patch.object(tt, "get_env_value", side_effect=fake_get_env_value), \ + with patch.object(xai_http, "get_env_value", side_effect=fake_get_env_value), \ patch("requests.post", side_effect=fake_post), \ patch("builtins.open", MagicMock()): result = tt._transcribe_xai("/tmp/fake.mp3", "grok-stt") diff --git a/tests/tools/test_tts_dotenv_fallback.py b/tests/tools/test_tts_dotenv_fallback.py index 05083208709..0a4ea5a8ac2 100644 --- a/tests/tools/test_tts_dotenv_fallback.py +++ b/tests/tools/test_tts_dotenv_fallback.py @@ -57,7 +57,12 @@ class TestDotenvFallbackPerProvider: mock_import.return_value.assert_called_once_with(api_key="el-dotenv-key") def test_xai_reads_dotenv_key(self, tmp_path): + """xAI TTS now resolves credentials through ``tools.xai_http``; the + dotenv fallback contract from #17140 is preserved by patching the + resolver's ``get_env_value`` rather than ``tts_tool.get_env_value``. + """ from tools import tts_tool + from tools import xai_http captured: dict = {} @@ -69,7 +74,7 @@ class TestDotenvFallbackPerProvider: response.raise_for_status = MagicMock() return response - with patch.object(tts_tool, "get_env_value", return_value="xai-dotenv-key"), \ + with patch.object(xai_http, "get_env_value", return_value="xai-dotenv-key"), \ patch("requests.post", side_effect=fake_post): tts_tool._generate_xai_tts("hi", str(tmp_path / "out.mp3"), {}) diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index 6f6d2f8c2a3..d741530d358 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -726,8 +726,8 @@ def _transcribe_xai(file_path: str, model_name: str) -> Dict[str, Any]: xai_config = stt_config.get("xai", {}) base_url = str( xai_config.get("base_url") - or creds.get("base_url") or get_env_value("XAI_STT_BASE_URL") + or creds.get("base_url") or XAI_STT_BASE_URL ).strip().rstrip("/") language = str( diff --git a/tools/xai_http.py b/tools/xai_http.py index fbb7961d244..216a51ff10d 100644 --- a/tools/xai_http.py +++ b/tools/xai_http.py @@ -5,6 +5,25 @@ from __future__ import annotations import os from typing import Dict +try: + from hermes_cli.config import get_env_value as _hermes_get_env_value +except Exception: + _hermes_get_env_value = None + + +def get_env_value(name: str, default=None): + """Read ``name`` from ``~/.hermes/.env`` first, then ``os.environ``. + + Wraps :func:`hermes_cli.config.get_env_value` so tests can patch + ``tools.xai_http.get_env_value`` to inject dotenv-only secrets into the + xAI credential resolver. + """ + if _hermes_get_env_value is not None: + value = _hermes_get_env_value(name) + if value is not None: + return value + return os.environ.get(name, default) + def hermes_xai_user_agent() -> str: """Return a stable Hermes-specific User-Agent for xAI HTTP calls.""" @@ -19,8 +38,11 @@ def resolve_xai_http_credentials() -> Dict[str, str]: """Resolve bearer credentials for direct xAI HTTP endpoints. Prefers Hermes-managed xAI OAuth credentials when available, then falls back - to ``XAI_API_KEY`` from the environment. This keeps direct xAI endpoints - (images, TTS, STT, etc.) aligned with the main runtime auth model. + to ``XAI_API_KEY`` resolved via ``hermes_cli.config.get_env_value`` so keys + stored in ``~/.hermes/.env`` (the standard Hermes location) are honored — + not just ones already exported into ``os.environ``. This keeps direct xAI + endpoints (images, TTS, STT, etc.) aligned with the main runtime auth model + and preserves the regression contract from PR #17140 / #17163. """ try: from hermes_cli.runtime_provider import resolve_runtime_provider @@ -52,8 +74,8 @@ def resolve_xai_http_credentials() -> Dict[str, str]: except Exception: pass - api_key = os.getenv("XAI_API_KEY", "").strip() - base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/") + api_key = str(get_env_value("XAI_API_KEY") or "").strip() + base_url = str(get_env_value("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/") return { "provider": "xai", "api_key": api_key, From 7fdc16dd4a281dad84a245ab9eed3be2f4a94264 Mon Sep 17 00:00:00 2001 From: Jaaneek Date: Fri, 15 May 2026 18:28:01 +0100 Subject: [PATCH 047/218] refactor(transports/codex): trim duplicated cache-key comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xAI prompt_cache_key block carried two long comment paragraphs that either restated setdefault semantics, narrated the SDK type-validation mechanism, or recapped the historical motivation for the extra_body indirection — all already covered by the test docstring at test_xai_responses_sends_cache_key_via_extra_body (which links to the xAI docs). Also restored the truncated link in the body-injection comment. No behavior change. --- agent/transports/codex.py | 20 ++++++------------- .../agent/transports/test_codex_transport.py | 5 ----- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/agent/transports/codex.py b/agent/transports/codex.py index 46169e971ba..cfd9f128778 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -102,11 +102,8 @@ class ResponsesApiTransport(ProviderTransport): kwargs["parallel_tool_calls"] = True session_id = params.get("session_id") - # xAI's Responses API uses `prompt_cache_key` (body-level) as the - # cache-routing key, not a top-level kwarg — the body-field - # injection below survives openai SDK builds whose - # Responses.stream() signature drops the kwarg. Everything else - # that ISN'T github/xAI keeps using the typed kwarg. + # xAI Responses takes prompt_cache_key in extra_body (set further + # down); GitHub Models opts out of cache-key routing entirely. if not is_github_responses and not is_xai_responses and session_id: kwargs["prompt_cache_key"] = session_id @@ -172,15 +169,10 @@ class ResponsesApiTransport(ProviderTransport): merged_extra_headers["x-grok-conv-id"] = session_id kwargs["extra_headers"] = merged_extra_headers - # xAI Responses cache-routing field. Lives in the request body - # (per https://docs.x.ai/.../prompt-caching/maximizing-cache-hits), - # so we ship it via extra_body — the openai SDK serializes - # extra_body fields into the JSON body without per-field type - # validation, sidestepping the TypeError that fires on - # Responses.stream() builds whose `prompt_cache_key` kwarg has - # been dropped. Setdefault preserves a caller-supplied value - # (e.g. request_overrides.extra_body.prompt_cache_key) over - # the auto-derived session_id. + # xAI Responses cache-routing — body-level field per + # https://docs.x.ai/developers/advanced-api-usage/prompt-caching/maximizing-cache-hits. + # Sent via extra_body (not the typed kwarg) so it survives openai + # SDK builds whose Responses.stream() signature has dropped the field. existing_extra_body = kwargs.get("extra_body") merged_extra_body: Dict[str, Any] = {} if isinstance(existing_extra_body, dict): diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index 7100e8ac17d..ad70167b09f 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -117,13 +117,8 @@ class TestCodexBuildKwargs: session_id="conv-xai-1", is_xai_responses=True, ) - # Top-level prompt_cache_key must NOT be set for xAI — the SDK - # signature drop is what motivated the extra_body indirection in - # the first place. The cache-routing field must travel in the - # body via extra_body. assert "prompt_cache_key" not in kw assert kw.get("extra_body", {}).get("prompt_cache_key") == "conv-xai-1" - # Header kept as belt-and-braces. assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-xai-1" def test_xai_responses_extra_body_preserves_caller_fields(self, transport): From 1e4801b8d0c27c1d6f6f8ed14ace0d3045a0d695 Mon Sep 17 00:00:00 2001 From: Jaaneek Date: Fri, 15 May 2026 18:46:45 +0100 Subject: [PATCH 048/218] docs(xai-oauth): correct logout command (was hermes auth remove) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous "Logging Out" section showed `hermes auth remove xai-oauth` with no positional target — argparse rejects that and the command does not clear the singleton OAuth state anyway. The correct command for the "clear everything" intent is `hermes auth logout xai-oauth`. Also point users at `hermes auth remove xai-oauth ` for single-pool-row deletion. --- website/docs/guides/xai-grok-oauth.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md index 49c7087621a..5afccb6d881 100644 --- a/website/docs/guides/xai-grok-oauth.md +++ b/website/docs/guides/xai-grok-oauth.md @@ -198,13 +198,13 @@ The auth store has no `xai-oauth` entry and no `XAI_API_KEY` is set. You haven't ## Logging Out -To remove stored xAI Grok OAuth credentials: +To remove all stored xAI Grok OAuth credentials: ```bash -hermes auth remove xai-oauth +hermes auth logout xai-oauth ``` -This clears both the singleton `loopback_pkce` entry in `auth.json` and any matching credential-pool rows. +This clears both the singleton OAuth entry in `auth.json` and any credential-pool rows for `xai-oauth`. Use `hermes auth remove xai-oauth ` if you only want to drop a single pool entry (run `hermes auth list xai-oauth` to see them). ## See Also From 7d7cdd48e06b9bbf0fd4e030f6745e8b033e1adc Mon Sep 17 00:00:00 2001 From: Jaaneek Date: Fri, 15 May 2026 19:04:14 +0100 Subject: [PATCH 049/218] test(xai-oauth): use grok-4.3 instead of retiring grok-code-fast-1 Per @mark-xai's review on PR #26457 and the xAI model retirement on 2026-05-15: grok-code-fast-1 is being retired today and aliases redirect to grok-4.3 (already pinned to the top of the xAI model list by this PR). Update the two xAI Responses-API test fixtures Mark flagged plus the picker fallback default in hermes_cli/main.py that uses the same literal. --- hermes_cli/main.py | 2 +- tests/run_agent/test_run_agent_codex_responses.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index c7ac1100816..c2c8a6880d2 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2887,7 +2887,7 @@ def _model_flow_xai_oauth(_config, current_model=""): pass models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or []) - selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-code-fast-1")) + selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-4.3")) if selected: _save_model_choice(selected) _update_config_for_provider("xai-oauth", base_url) diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index 8cc02629523..5652281eb42 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -581,7 +581,7 @@ def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch): def _build_xai_oauth_agent(monkeypatch): _patch_agent_bootstrap(monkeypatch) agent = run_agent.AIAgent( - model="grok-code-fast-1", + model="grok-4.3", provider="xai-oauth", api_mode="codex_responses", base_url="https://api.x.ai/v1", @@ -619,7 +619,7 @@ def test_build_api_kwargs_xai_oauth_sends_cache_key_via_extra_body(monkeypatch): ] ) - assert kwargs.get("model") == "grok-code-fast-1" + assert kwargs.get("model") == "grok-4.3" # Top-level kwarg must NOT be set — that's the openai SDK # incompatibility this whole indirection exists to dodge. assert "prompt_cache_key" not in kwargs From aac6d97a143759731431ade9a098b4baa55fc53d Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 12:11:08 -0700 Subject: [PATCH 050/218] chore(xai-oauth): trim CORS allowlist to xAI auth origins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop accounts.mouseion.dev and localhost:20000 / 127.0.0.1:20000 from the loopback callback CORS allowlist — leftover dev origins. The redirect_uri is bound to 127.0.0.1 and gated by PKCE + state, so only xAI's own auth origins are needed. Co-Authored-By: Jaaneek --- hermes_cli/auth.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 8749cd9461c..c6dce709384 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -2081,12 +2081,12 @@ def _xai_validate_loopback_redirect_uri(redirect_uri: str) -> tuple[str, int, st def _xai_callback_cors_origin(origin: Optional[str]) -> str: + # CORS allowlist for the loopback callback. Only xAI's own auth origins + # are accepted; the redirect_uri itself is bound to 127.0.0.1 and gated by + # PKCE+state, so additional dev/3p origins are not needed here. allowed = { "https://accounts.x.ai", "https://auth.x.ai", - "https://accounts.mouseion.dev", - "http://localhost:20000", - "http://127.0.0.1:20000", } return origin if origin in allowed else "" From 4ad5fa702f6c04a2032be876a8d4d0b37a88459d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 12:33:12 -0700 Subject: [PATCH 051/218] docs(xai-oauth): add xai-oauth to provider enumeration pages (#26542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #26534 (xai-oauth provider). The new guide and integrations page were shipped with the salvage, but four reference/enumeration pages still listed every other OAuth provider without xai-oauth: - reference/cli-commands.md — `--provider` choices list - reference/environment-variables.md — HERMES_INFERENCE_PROVIDER values - user-guide/configuration.md — auxiliary-task provider list, OAuth tip block (mirrored from MiniMax OAuth), and provider table row - user-guide/features/fallback-providers.md — provider table --- website/docs/reference/cli-commands.md | 2 +- website/docs/reference/environment-variables.md | 2 +- website/docs/user-guide/configuration.md | 7 ++++++- website/docs/user-guide/features/fallback-providers.md | 1 + 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index a895e1efa74..aa12f431b62 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -92,7 +92,7 @@ Common options: | `-q`, `--query "..."` | One-shot, non-interactive prompt. | | `-m`, `--model ` | Override the model for this run. | | `-t`, `--toolsets ` | Enable a comma-separated set of toolsets. | -| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `novita`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`, `lmstudio`, `stepfun`, `tencent-tokenhub` (alias `tencent`, `tokenhub`). | +| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `novita`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `xai-oauth` (alias `grok-oauth`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`, `lmstudio`, `stepfun`, `tencent-tokenhub` (alias `tencent`, `tokenhub`). | | `-s`, `--skills ` | Preload one or more skills for the session (can be repeated or comma-separated). | | `-v`, `--verbose` | Verbose output. | | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. | diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 93107fba147..56fe8a13715 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -105,7 +105,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | Variable | Description | |----------|-------------| -| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `novita`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) | +| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `novita`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `xai-oauth` (browser OAuth login for SuperGrok subscribers — no API key required; see [xAI Grok OAuth guide](../guides/xai-grok-oauth.md)), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) | | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) | | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL | | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) | diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 89bdb234146..d529c8af687 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -813,12 +813,16 @@ Every model slot in Hermes — auxiliary tasks, compression, fallback — uses t When `base_url` is set, Hermes ignores the provider and calls that endpoint directly (using `api_key` or `OPENAI_API_KEY` for auth). When only `provider` is set, Hermes uses that provider's built-in auth and base URL. -Available providers for auxiliary tasks: `auto`, `main`, plus any provider in the [provider registry](/docs/reference/environment-variables) — `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `alibaba`, `bedrock`, `huggingface`, `arcee`, `xiaomi`, `kilocode`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry` — or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`). +Available providers for auxiliary tasks: `auto`, `main`, plus any provider in the [provider registry](/docs/reference/environment-variables) — `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `xai-oauth`, `ollama-cloud`, `alibaba`, `bedrock`, `huggingface`, `arcee`, `xiaomi`, `kilocode`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry` — or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`). :::tip MiniMax OAuth `minimax-oauth` logs in via browser OAuth (no API key needed). Run `hermes model` and select **MiniMax (OAuth)** to authenticate. Auxiliary tasks use `MiniMax-M2.7-highspeed` automatically. See the [MiniMax OAuth guide](../guides/minimax-oauth.md). ::: +:::tip xAI Grok OAuth +`xai-oauth` logs in via browser OAuth for SuperGrok subscribers (no API key needed). Run `hermes model` and select **xAI Grok OAuth (SuperGrok Subscription)** to authenticate. The same OAuth token is reused for every direct-to-xAI surface (chat, auxiliary tasks, TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md). +::: + :::warning `"main"` is for auxiliary tasks only The `"main"` provider option means "use whatever provider my main agent uses" — it's only valid inside `auxiliary:`, `compression:`, and `fallback_model:` configs. It is **not** a valid value for your top-level `model.provider` setting. If you use a custom OpenAI-compatible endpoint, set `provider: custom` in your `model:` section. See [AI Providers](/docs/integrations/providers) for all main model provider options. ::: @@ -980,6 +984,7 @@ These options apply to **auxiliary task configs** (`auxiliary:`, `compression:`, | `"nous"` | Force Nous Portal | `hermes auth` | | `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex | | `"minimax-oauth"` | Force MiniMax OAuth (browser login, no API key). Uses MiniMax-M2.7-highspeed for auxiliary tasks. | `hermes model` → MiniMax (OAuth) | +| `"xai-oauth"` | Force xAI Grok OAuth (browser login for SuperGrok subscribers, no API key). Same OAuth token covers chat, TTS, image, video, and transcription. | `hermes model` → xAI Grok OAuth (SuperGrok Subscription) | | `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. **Auxiliary tasks only — not valid for `model.provider`.** | Custom endpoint credentials + base URL | Direct API-key providers from the main provider catalog also work here when you want side tasks to bypass your default router. `gmi` is valid once `GMI_API_KEY` is configured: diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index cd002ae689e..72528796d55 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -66,6 +66,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback | Google Gemini (OAuth) | `google-gemini-cli` | `hermes model` (Google OAuth; optional: `HERMES_GEMINI_PROJECT_ID`) | | Google AI Studio | `gemini` | `GOOGLE_API_KEY` (alias: `GEMINI_API_KEY`) | | xAI (Grok) | `xai` (alias `grok`) | `XAI_API_KEY` (optional: `XAI_BASE_URL`) | +| xAI Grok OAuth (SuperGrok) | `xai-oauth` (alias `grok-oauth`) | `hermes model` → xAI Grok OAuth (browser login; SuperGrok subscription) | | AWS Bedrock | `bedrock` | Standard boto3 auth (`AWS_REGION` + `AWS_PROFILE` or `AWS_ACCESS_KEY_ID`) | | Qwen Portal (OAuth) | `qwen-oauth` | `hermes model` (Qwen Portal OAuth; optional: `HERMES_QWEN_BASE_URL`) | | MiniMax (OAuth) | `minimax-oauth` | `hermes model` (MiniMax portal OAuth) | From 734aa0f367a5ace259e4c35d7b002b634a3149ae Mon Sep 17 00:00:00 2001 From: aydnOktay Date: Tue, 24 Mar 2026 13:50:11 +0300 Subject: [PATCH 052/218] fix(cronjob): require explicit truthy session env values --- tests/tools/test_cronjob_tools.py | 7 +++++++ tools/cronjob_tools.py | 14 +++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py index 3e1f85c370a..34c5fede560 100644 --- a/tests/tools/test_cronjob_tools.py +++ b/tests/tools/test_cronjob_tools.py @@ -122,6 +122,13 @@ class TestCronjobRequirements: assert check_cronjob_requirements() is False + @pytest.mark.parametrize("false_like_value", ["0", "false", "no", "off"]) + def test_rejects_false_like_interactive_env(self, monkeypatch, false_like_value): + monkeypatch.setenv("HERMES_INTERACTIVE", false_like_value) + monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False) + monkeypatch.delenv("HERMES_EXEC_ASK", raising=False) + assert check_cronjob_requirements() is False + class TestUnifiedCronjobTool: @pytest.fixture(autouse=True) diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index 3c29431484d..698aab2cfc2 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -662,6 +662,14 @@ Important safety rule: cron-run sessions should not recursively schedule more cr } +def _is_truthy_env(var_name: str) -> bool: + """Return True only for explicit truthy env values.""" + value = os.getenv(var_name) + if value is None: + return False + return value.strip().lower() in {"1", "true", "yes", "on"} + + def check_cronjob_requirements() -> bool: """ Check if cronjob tools can be used. @@ -671,9 +679,9 @@ def check_cronjob_requirements() -> bool: so no external crontab executable is required. """ return bool( - os.getenv("HERMES_INTERACTIVE") - or os.getenv("HERMES_GATEWAY_SESSION") - or os.getenv("HERMES_EXEC_ASK") + _is_truthy_env("HERMES_INTERACTIVE") + or _is_truthy_env("HERMES_GATEWAY_SESSION") + or _is_truthy_env("HERMES_EXEC_ASK") ) From 931caf2b2d42d6e76b8c470e5d44ca20704c41dc Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 02:03:49 -0700 Subject: [PATCH 053/218] fix(env-flags): widen truthy-only session env checks to sibling sites Build on @aydnOktay's cronjob fix by routing the cronjob check through the shared 'env_var_enabled' helper in utils.py (same truthy set: 1/true/yes/on) and applying the same semantics to the 8 sibling call sites that read HERMES_INTERACTIVE / HERMES_GATEWAY_SESSION / HERMES_EXEC_ASK / HERMES_CRON_SESSION with bare os.getenv() truthy checks: - tools/approval.py: _is_gateway_approval_context (2), check_command_safety (2), check_all_command_guards (3) -- 7 sites total - tools/terminal_tool.py: _handle_sudo_failure, sudo password prompt -- 2 sites - tools/skills_tool.py: _is_gateway_surface -- 1 site Without this, a user who exports HERMES_INTERACTIVE=0 in their shell still gets interactive sudo prompts, approval prompts, and gateway skill-install paths -- only the cronjob tool was hardened. Now all consumers agree on the same false-like values. Also drops the duplicate _is_truthy_env helper from cronjob_tools.py in favour of the existing canonical utils.env_var_enabled. Tests: extend the parametrized regression coverage to all three session env vars (HERMES_INTERACTIVE / HERMES_GATEWAY_SESSION / HERMES_EXEC_ASK) symmetrically. tests/tools/test_cronjob_tools.py: 60/60 pass; tests/tools/{approval,terminal_tool,skills_tool, cron_approval_mode,hardline_blocklist}.py: 378/378 pass. --- tests/tools/test_cronjob_tools.py | 14 ++++++++++++++ tools/approval.py | 18 +++++++++--------- tools/cronjob_tools.py | 23 +++++++++++------------ tools/skills_tool.py | 3 ++- tools/terminal_tool.py | 6 ++++-- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py index 34c5fede560..6280b71d29f 100644 --- a/tests/tools/test_cronjob_tools.py +++ b/tests/tools/test_cronjob_tools.py @@ -129,6 +129,20 @@ class TestCronjobRequirements: monkeypatch.delenv("HERMES_EXEC_ASK", raising=False) assert check_cronjob_requirements() is False + @pytest.mark.parametrize( + "var_name", + ["HERMES_INTERACTIVE", "HERMES_GATEWAY_SESSION", "HERMES_EXEC_ASK"], + ) + @pytest.mark.parametrize("false_like_value", ["0", "false", "no", "off"]) + def test_rejects_false_like_any_session_env( + self, monkeypatch, var_name, false_like_value + ): + """All three session env vars share the same truthy semantics.""" + for v in ("HERMES_INTERACTIVE", "HERMES_GATEWAY_SESSION", "HERMES_EXEC_ASK"): + monkeypatch.delenv(v, raising=False) + monkeypatch.setenv(var_name, false_like_value) + assert check_cronjob_requirements() is False + class TestUnifiedCronjobTool: @pytest.fixture(autouse=True) diff --git a/tools/approval.py b/tools/approval.py index dbb3810886f..84d02cc6a98 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -19,7 +19,7 @@ import unicodedata from typing import Optional from hermes_cli.config import cfg_get -from utils import is_truthy_value +from utils import env_var_enabled, is_truthy_value logger = logging.getLogger(__name__) @@ -108,9 +108,9 @@ def _is_gateway_approval_context() -> bool: fall through to the gateway branch would submit a pending approval with no listener and block the job indefinitely. """ - if os.getenv("HERMES_CRON_SESSION"): + if env_var_enabled("HERMES_CRON_SESSION"): return False - if os.getenv("HERMES_GATEWAY_SESSION"): + if env_var_enabled("HERMES_GATEWAY_SESSION"): return True return bool(_get_session_platform()) @@ -928,12 +928,12 @@ def check_dangerous_command(command: str, env_type: str, if is_approved(session_key, pattern_key): return {"approved": True, "message": None} - is_cli = os.getenv("HERMES_INTERACTIVE") + is_cli = env_var_enabled("HERMES_INTERACTIVE") is_gateway = _is_gateway_approval_context() if not is_cli and not is_gateway: # Cron sessions: respect cron_mode config - if os.getenv("HERMES_CRON_SESSION"): + if env_var_enabled("HERMES_CRON_SESSION"): if _get_cron_approval_mode() == "deny": return { "approved": False, @@ -947,7 +947,7 @@ def check_dangerous_command(command: str, env_type: str, } return {"approved": True, "message": None} - if is_gateway or os.getenv("HERMES_EXEC_ASK"): + if is_gateway or env_var_enabled("HERMES_EXEC_ASK"): submit_pending(session_key, { "command": command, "pattern_key": pattern_key, @@ -1056,15 +1056,15 @@ def check_all_command_guards(command: str, env_type: str, if is_truthy_value(os.getenv("HERMES_YOLO_MODE")) or is_current_session_yolo_enabled() or approval_mode == "off": return {"approved": True, "message": None} - is_cli = os.getenv("HERMES_INTERACTIVE") + is_cli = env_var_enabled("HERMES_INTERACTIVE") is_gateway = _is_gateway_approval_context() - is_ask = os.getenv("HERMES_EXEC_ASK") + is_ask = env_var_enabled("HERMES_EXEC_ASK") # Preserve the existing non-interactive behavior: outside CLI/gateway/ask # flows, we do not block on approvals and we skip external guard work. if not is_cli and not is_gateway and not is_ask: # Cron sessions: respect cron_mode config - if os.getenv("HERMES_CRON_SESSION"): + if env_var_enabled("HERMES_CRON_SESSION"): if _get_cron_approval_mode() == "deny": # Run detection to get a description for the block message is_dangerous, _pk, description = detect_dangerous_command(command) diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index 698aab2cfc2..a7a8a0feab9 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -662,14 +662,6 @@ Important safety rule: cron-run sessions should not recursively schedule more cr } -def _is_truthy_env(var_name: str) -> bool: - """Return True only for explicit truthy env values.""" - value = os.getenv(var_name) - if value is None: - return False - return value.strip().lower() in {"1", "true", "yes", "on"} - - def check_cronjob_requirements() -> bool: """ Check if cronjob tools can be used. @@ -677,11 +669,18 @@ def check_cronjob_requirements() -> bool: Available in interactive CLI mode and gateway/messaging platforms. The cron system is internal (JSON file-based scheduler ticked by the gateway), so no external crontab executable is required. + + Session env vars must hold an explicit truthy string (``1``, ``true``, + ``yes``, ``on``) — false-like values (``0``, ``false``, ``no``, ``off``) + leave the tool disabled. Uses the shared ``env_var_enabled`` helper so + every consumer of these flags agrees on the truthy set. """ - return bool( - _is_truthy_env("HERMES_INTERACTIVE") - or _is_truthy_env("HERMES_GATEWAY_SESSION") - or _is_truthy_env("HERMES_EXEC_ASK") + from utils import env_var_enabled + + return ( + env_var_enabled("HERMES_INTERACTIVE") + or env_var_enabled("HERMES_GATEWAY_SESSION") + or env_var_enabled("HERMES_EXEC_ASK") ) diff --git a/tools/skills_tool.py b/tools/skills_tool.py index 0fcd449b80b..df6361ba59a 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -78,6 +78,7 @@ from typing import Dict, Any, List, Optional, Set, Tuple from tools.registry import registry, tool_error from hermes_cli.config import cfg_get +from utils import env_var_enabled logger = logging.getLogger(__name__) @@ -365,7 +366,7 @@ def _capture_required_environment_variables( def _is_gateway_surface() -> bool: - if os.getenv("HERMES_GATEWAY_SESSION"): + if env_var_enabled("HERMES_GATEWAY_SESSION"): return True from gateway.session_context import get_session_env return bool(get_session_env("HERMES_SESSION_PLATFORM")) diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index e0d07e80f6e..31a1c6fa078 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -47,6 +47,8 @@ import subprocess from pathlib import Path from typing import Optional, Dict, Any, List +from utils import env_var_enabled + logger = logging.getLogger(__name__) @@ -360,7 +362,7 @@ def _handle_sudo_failure(output: str, env_type: str) -> str: Returns enhanced output if sudo failed in messaging context, else original. """ - is_gateway = os.getenv("HERMES_GATEWAY_SESSION") + is_gateway = env_var_enabled("HERMES_GATEWAY_SESSION") if not is_gateway: return output @@ -868,7 +870,7 @@ def _transform_sudo_command(command: str | None) -> tuple[str | None, str | None if not has_configured_password and not sudo_password and _sudo_nopasswd_works(): return command, None - if not has_configured_password and not sudo_password and os.getenv("HERMES_INTERACTIVE"): + if not has_configured_password and not sudo_password and env_var_enabled("HERMES_INTERACTIVE"): sudo_password = _prompt_for_sudo_password(timeout_seconds=45) if sudo_password: _set_cached_sudo_password(sudo_password) From 4e89c53082b13b71d0c7f2f662cd65ea80d9f17c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:00:01 -0700 Subject: [PATCH 054/218] fix(async): close unscheduled coroutines in all threadsafe bridges (#26584) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wraps every sync->async coroutine-scheduling site in the codebase with a new agent.async_utils.safe_schedule_threadsafe() helper that closes the coroutine on scheduling failure (closed loop, shutdown race, etc.) instead of leaking it as 'coroutine was never awaited' RuntimeWarnings plus reference leaks. 22 production call sites migrated across the codebase: - acp_adapter/events.py, acp_adapter/permissions.py - agent/lsp/manager.py - cron/scheduler.py (media + text delivery paths) - gateway/platforms/feishu.py (5 sites, via existing _submit_on_loop helper which now delegates to safe_schedule_threadsafe) - gateway/run.py (10 sites: telegram rename, agent:step hook, status callback, interim+bg-review, clarify send, exec-approval button+text, temp-bubble cleanup, channel-directory refresh) - plugins/memory/hindsight, plugins/platforms/google_chat - tools/browser_supervisor.py (3), browser_cdp_tool.py, computer_use/cua_backend.py, slash_confirm.py - tools/environments/modal.py (_AsyncWorker) - tools/mcp_tool.py (2 + 8 _run_on_mcp_loop callers converted to factory-style so the coroutine is never constructed on a dead loop) - tui_gateway/ws.py Tests: new tests/agent/test_async_utils.py covers helper behavior under live loop, dead loop, None loop, and scheduling exceptions. Regression tests added at three PR-original sites (acp events, acp permissions, mcp loop runner) mirroring contributor's intent. Live-tested end-to-end: - Helper stress test: 1500 schedules across live/dead/race scenarios, zero leaked coroutines - Race exercised: 5000 schedules with loop killed mid-flight, 100 ok / 4900 None returns, zero leaks - hermes chat -q with terminal tool call (exercises step_callback bridge) - MCP probe against failing subprocess servers + factory path - Real gateway daemon boot + SIGINT shutdown across multiple platform adapter inits - WSTransport 100 live + 50 dead-loop writes - Cron delivery path live + dead loop Salvages PR #2657 — adopts contributor's intent over a much wider site list and a single centralized helper instead of inline try/except at each site. 3 of the original PR's 6 sites no longer exist on main (environments/patches.py deleted, DingTalk refactored to native async); the equivalent fix lives in tools/environments/modal.py instead. Co-authored-by: JithendraNara --- acp_adapter/events.py | 13 +- acp_adapter/permissions.py | 27 ++- agent/async_utils.py | 68 +++++++ agent/lsp/manager.py | 7 +- cron/scheduler.py | 39 ++-- gateway/platforms/feishu.py | 44 ++--- gateway/run.py | 206 ++++++++++++--------- plugins/memory/hindsight/__init__.py | 5 +- plugins/platforms/google_chat/adapter.py | 10 +- scripts/release.py | 1 + tests/acp/test_events.py | 46 +++++ tests/acp/test_permissions.py | 49 ++++- tests/agent/test_async_utils.py | 157 ++++++++++++++++ tests/tools/test_mcp_probe.py | 12 +- tests/tools/test_mcp_structured_content.py | 3 +- tests/tools/test_mcp_tool.py | 80 +++++++- tools/browser_cdp_tool.py | 8 +- tools/browser_supervisor.py | 22 ++- tools/computer_use/cua_backend.py | 7 +- tools/environments/modal.py | 7 +- tools/mcp_tool.py | 52 ++++-- tools/slash_confirm.py | 7 +- tui_gateway/ws.py | 6 +- 23 files changed, 690 insertions(+), 186 deletions(-) create mode 100644 agent/async_utils.py create mode 100644 tests/agent/test_async_utils.py diff --git a/acp_adapter/events.py b/acp_adapter/events.py index 1257f902ebb..f0442ca2e8f 100644 --- a/acp_adapter/events.py +++ b/acp_adapter/events.py @@ -31,10 +31,17 @@ def _send_update( update: Any, ) -> None: """Fire-and-forget an ACP session update from a worker thread.""" + from agent.async_utils import safe_schedule_threadsafe + + future = safe_schedule_threadsafe( + conn.session_update(session_id, update), + loop, + logger=logger, + log_message="Failed to send ACP update", + ) + if future is None: + return try: - future = asyncio.run_coroutine_threadsafe( - conn.session_update(session_id, update), loop - ) future.result(timeout=5) except Exception: logger.debug("Failed to send ACP update", exc_info=True) diff --git a/acp_adapter/permissions.py b/acp_adapter/permissions.py index 44aead28742..76474e55dac 100644 --- a/acp_adapter/permissions.py +++ b/acp_adapter/permissions.py @@ -111,21 +111,28 @@ def make_approval_callback( allow_permanent: bool = True, **_: object, ) -> str: + from agent.async_utils import safe_schedule_threadsafe + options = _build_permission_options(allow_permanent=allow_permanent) - future = None + tool_call = _build_permission_tool_call(command, description) + coro = request_permission_fn( + session_id=session_id, + tool_call=tool_call, + options=options, + ) + future = safe_schedule_threadsafe( + coro, loop, + logger=logger, + log_message="Permission request: failed to schedule on loop", + ) + if future is None: + return "deny" + try: - tool_call = _build_permission_tool_call(command, description) - coro = request_permission_fn( - session_id=session_id, - tool_call=tool_call, - options=options, - ) - future = asyncio.run_coroutine_threadsafe(coro, loop) response = future.result(timeout=timeout) except (FutureTimeout, Exception) as exc: - if future is not None: - future.cancel() + future.cancel() logger.warning("Permission request timed out or failed: %s", exc) return "deny" diff --git a/agent/async_utils.py b/agent/async_utils.py new file mode 100644 index 00000000000..d268e1a3a84 --- /dev/null +++ b/agent/async_utils.py @@ -0,0 +1,68 @@ +"""Async/sync bridging helpers. + +The codebase has ~30 sites that schedule a coroutine onto an event loop from a +worker thread via :func:`asyncio.run_coroutine_threadsafe`. That function can +raise :class:`RuntimeError` (e.g. the loop was closed during a shutdown race), +and when it does the coroutine object is never awaited and never closed — +which triggers a ``"coroutine '' was never awaited"`` RuntimeWarning and +leaks the coroutine's frame until GC. + +:func:`safe_schedule_threadsafe` wraps the call, closes the coroutine on +scheduling failure, and returns ``None`` (instead of a half-formed future) so +callers can branch cleanly: + + fut = safe_schedule_threadsafe(coro, loop) + if fut is None: + return # or fallback behavior + fut.result(timeout=5) + +The helper deliberately does NOT also handle ``future.result()`` failures — +that is a separate concern. Once the loop has accepted the coroutine, its +lifecycle belongs to the loop, not the scheduling thread. +""" +from __future__ import annotations + +import asyncio +import logging +from concurrent.futures import Future +from typing import Any, Coroutine, Optional + + +_DEFAULT_LOGGER = logging.getLogger(__name__) + + +def safe_schedule_threadsafe( + coro: Coroutine[Any, Any, Any], + loop: Optional[asyncio.AbstractEventLoop], + *, + logger: Optional[logging.Logger] = None, + log_message: str = "Failed to schedule coroutine on loop", + log_level: int = logging.DEBUG, +) -> Optional[Future]: + """Schedule ``coro`` on ``loop`` from a sync context, leak-safe. + + Returns the :class:`concurrent.futures.Future` on success, or ``None`` if + the loop is missing or :func:`asyncio.run_coroutine_threadsafe` raised + (e.g. the loop was closed during a shutdown race). In all failure paths + the coroutine is :meth:`close`-d so it does not trigger + ``"coroutine was never awaited"`` warnings or leak its frame. + + Callers retain full control over what to do with the returned future + (call ``.result(timeout=...)``, attach ``add_done_callback``, ignore it + fire-and-forget, etc.). + """ + log = logger if logger is not None else _DEFAULT_LOGGER + + if loop is None: + if asyncio.iscoroutine(coro): + coro.close() + log.log(log_level, "%s: loop is None", log_message) + return None + + try: + return asyncio.run_coroutine_threadsafe(coro, loop) + except Exception as exc: + if asyncio.iscoroutine(coro): + coro.close() + log.log(log_level, "%s: %s", log_message, exc) + return None diff --git a/agent/lsp/manager.py b/agent/lsp/manager.py index 34c0b0ba92b..7f5feaa170f 100644 --- a/agent/lsp/manager.py +++ b/agent/lsp/manager.py @@ -107,9 +107,14 @@ class _BackgroundLoop: Returns the coroutine's result, or raises its exception. """ + from agent.async_utils import safe_schedule_threadsafe if self._loop is None: + if asyncio.iscoroutine(coro): + coro.close() raise RuntimeError("background loop not started") - fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop) + fut = safe_schedule_threadsafe(coro, self._loop) + if fut is None: + raise RuntimeError("background loop not running") try: return fut.result(timeout=timeout) except Exception: diff --git a/cron/scheduler.py b/cron/scheduler.py index b585ef2e42b..d470e8c2c74 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -464,7 +464,14 @@ def _send_media_via_adapter( else: coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata) - future = asyncio.run_coroutine_threadsafe(coro, loop) + from agent.async_utils import safe_schedule_threadsafe + future = safe_schedule_threadsafe(coro, loop) + if future is None: + logger.warning( + "Job '%s': cannot send media %s, gateway loop unavailable", + job.get("id", "?"), media_path, + ) + return try: result = future.result(timeout=30) except TimeoutError: @@ -585,22 +592,26 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option text_to_send = cleaned_delivery_content.strip() adapter_ok = True if text_to_send: - future = asyncio.run_coroutine_threadsafe( + from agent.async_utils import safe_schedule_threadsafe + future = safe_schedule_threadsafe( runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata), loop, ) - try: - send_result = future.result(timeout=60) - except TimeoutError: - future.cancel() - raise - if send_result and not getattr(send_result, "success", True): - err = getattr(send_result, "error", "unknown") - logger.warning( - "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone", - job["id"], platform_name, chat_id, err, - ) - adapter_ok = False # fall through to standalone path + if future is None: + adapter_ok = False + else: + try: + send_result = future.result(timeout=60) + except TimeoutError: + future.cancel() + raise + if send_result and not getattr(send_result, "success", True): + err = getattr(send_result, "error", "unknown") + logger.warning( + "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone", + job["id"], platform_name, chat_id, err, + ) + adapter_ok = False # fall through to standalone path # Send extracted media files as native attachments via the live adapter if adapter_ok and media_files: diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py index 8d60046d35d..a9b0447080d 100644 --- a/gateway/platforms/feishu.py +++ b/gateway/platforms/feishu.py @@ -2273,11 +2273,7 @@ class FeishuAdapter(BasePlatformAdapter): daemon=True, ).start() return - future = asyncio.run_coroutine_threadsafe( - self._handle_message_event_data(data), - loop, - ) - future.add_done_callback(self._log_background_failure) + self._submit_on_loop(loop, self._handle_message_event_data(data)) def _enqueue_pending_inbound_event(self, data: Any) -> bool: """Append an event to the pending-inbound queue. @@ -2353,16 +2349,12 @@ class FeishuAdapter(BasePlatformAdapter): dispatched = 0 requeue: List[Any] = [] for event in batch: - try: - fut = asyncio.run_coroutine_threadsafe( - self._handle_message_event_data(event), - loop, - ) - fut.add_done_callback(self._log_background_failure) + if self._submit_on_loop( + loop, self._handle_message_event_data(event) + ): dispatched += 1 - except RuntimeError: - # Loop closed between check and submit — requeue - # and poll again. + else: + # Loop closed/unavailable — requeue and poll again. requeue.append(event) if requeue: with self._pending_inbound_lock: @@ -2466,11 +2458,10 @@ class FeishuAdapter(BasePlatformAdapter): if not self._loop_accepts_callbacks(loop): logger.warning("[Feishu] Dropping drive comment event before adapter loop is ready") return - future = asyncio.run_coroutine_threadsafe( - handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id), + self._submit_on_loop( loop, + handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id), ) - future.add_done_callback(self._log_background_failure) def _on_reaction_event(self, event_type: str, data: Any) -> None: """Route user reactions on bot messages as synthetic text events.""" @@ -2498,11 +2489,7 @@ class FeishuAdapter(BasePlatformAdapter): or bool(getattr(loop, "is_closed", lambda: False)()) ): return - future = asyncio.run_coroutine_threadsafe( - self._handle_reaction_event(event_type, data), - loop, - ) - future.add_done_callback(self._log_background_failure) + self._submit_on_loop(loop, self._handle_reaction_event(event_type, data)) def _on_card_action_trigger(self, data: Any) -> Any: """Handle card-action callback from the Feishu SDK (synchronous). @@ -2548,11 +2535,14 @@ class FeishuAdapter(BasePlatformAdapter): def _submit_on_loop(self, loop: Any, coro: Any) -> bool: """Schedule background work on the adapter loop with shared failure logging.""" - try: - future = asyncio.run_coroutine_threadsafe(coro, loop) - except Exception: - coro.close() - logger.warning("[Feishu] Failed to schedule background callback work", exc_info=True) + from agent.async_utils import safe_schedule_threadsafe + future = safe_schedule_threadsafe( + coro, loop, + logger=logger, + log_message="[Feishu] Failed to schedule background callback work", + log_level=logging.WARNING, + ) + if future is None: return False future.add_done_callback(self._log_background_failure) return True diff --git a/gateway/run.py b/gateway/run.py index 5e8fce8e18d..f41357673f7 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -50,6 +50,7 @@ from typing import Dict, Optional, Any, List, Union # gateway is a long-running daemon, so its boot cost matters less than # preserving the established test-patch surface. from agent.account_usage import fetch_account_usage, render_account_usage_lines +from agent.async_utils import safe_schedule_threadsafe from agent.i18n import t from hermes_cli.config import cfg_get @@ -11217,10 +11218,14 @@ class GatewayRunner: copied_source = dataclasses.replace(source) except Exception: copied_source = source - future = asyncio.run_coroutine_threadsafe( + future = safe_schedule_threadsafe( self._rename_telegram_topic_for_session_title(copied_source, session_id, title), loop, + logger=logger, + log_message="Telegram topic title rename failed to schedule", ) + if future is None: + return def _log_rename_failure(fut) -> None: try: fut.result() @@ -14810,29 +14815,28 @@ class GatewayRunner: def _step_callback_sync(iteration: int, prev_tools: list) -> None: if not _run_still_current(): return - try: - # prev_tools may be list[str] or list[dict] with "name"/"result" - # keys. Normalise to keep "tool_names" backward-compatible for - # user-authored hooks that do ', '.join(tool_names)'. - _names: list[str] = [] - for _t in (prev_tools or []): - if isinstance(_t, dict): - _names.append(_t.get("name") or "") - else: - _names.append(str(_t)) - asyncio.run_coroutine_threadsafe( - _hooks_ref.emit("agent:step", { - "platform": source.platform.value if source.platform else "", - "user_id": source.user_id, - "session_id": session_id, - "iteration": iteration, - "tool_names": _names, - "tools": prev_tools, - }), - _loop_for_step, - ) - except Exception as _e: - logger.debug("agent:step hook error: %s", _e) + # prev_tools may be list[str] or list[dict] with "name"/"result" + # keys. Normalise to keep "tool_names" backward-compatible for + # user-authored hooks that do ', '.join(tool_names)'. + _names: list[str] = [] + for _t in (prev_tools or []): + if isinstance(_t, dict): + _names.append(_t.get("name") or "") + else: + _names.append(str(_t)) + safe_schedule_threadsafe( + _hooks_ref.emit("agent:step", { + "platform": source.platform.value if source.platform else "", + "user_id": source.user_id, + "session_id": session_id, + "iteration": iteration, + "tool_names": _names, + "tools": prev_tools, + }), + _loop_for_step, + logger=logger, + log_message="agent:step hook scheduling error", + ) # Bridge sync status_callback → async adapter.send for context pressure _status_adapter = self.adapters.get(source.platform) @@ -14852,27 +14856,28 @@ class GatewayRunner: def _status_callback_sync(event_type: str, message: str) -> None: if not _status_adapter or not _run_still_current(): return - try: - _fut = asyncio.run_coroutine_threadsafe( - _status_adapter.send( - _status_chat_id, - message, - metadata=_status_thread_metadata, - ), - _loop_for_step, - ) - if _cleanup_progress: - def _track_status_id(fut) -> None: - try: - res = fut.result() - except Exception: - return - mid = getattr(res, "message_id", None) - if getattr(res, "success", False) and mid: - _cleanup_msg_ids.append(str(mid)) - _fut.add_done_callback(_track_status_id) - except Exception as _e: - logger.debug("status_callback error (%s): %s", event_type, _e) + _fut = safe_schedule_threadsafe( + _status_adapter.send( + _status_chat_id, + message, + metadata=_status_thread_metadata, + ), + _loop_for_step, + logger=logger, + log_message=f"status_callback ({event_type}) scheduling error", + ) + if _fut is None: + return + if _cleanup_progress: + def _track_status_id(fut) -> None: + try: + res = fut.result() + except Exception: + return + mid = getattr(res, "message_id", None) + if getattr(res, "success", False) and mid: + _cleanup_msg_ids.append(str(mid)) + _fut.add_done_callback(_track_status_id) def run_sync(): # The conditional re-assignment of `message` further below @@ -15026,17 +15031,16 @@ class GatewayRunner: return if already_streamed or not _status_adapter or not str(text or "").strip(): return - try: - asyncio.run_coroutine_threadsafe( - _status_adapter.send( - _status_chat_id, - text, - metadata=_status_thread_metadata, - ), - _loop_for_step, - ) - except Exception as _e: - logger.debug("interim_assistant_callback error: %s", _e) + safe_schedule_threadsafe( + _status_adapter.send( + _status_chat_id, + text, + metadata=_status_thread_metadata, + ), + _loop_for_step, + logger=logger, + log_message="interim_assistant_callback scheduling error", + ) turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs) @@ -15125,17 +15129,16 @@ class GatewayRunner: def _deliver_bg_review_message(message: str) -> None: if not _status_adapter or not _run_still_current(): return - try: - asyncio.run_coroutine_threadsafe( - _status_adapter.send( - _status_chat_id, - message, - metadata=_status_thread_metadata, - ), - _loop_for_step, - ) - except Exception as _e: - logger.debug("background_review_callback error: %s", _e) + safe_schedule_threadsafe( + _status_adapter.send( + _status_chat_id, + message, + metadata=_status_thread_metadata, + ), + _loop_for_step, + logger=logger, + log_message="background_review_callback scheduling error", + ) def _release_bg_review_messages() -> None: _bg_review_release.set() @@ -15207,23 +15210,28 @@ class GatewayRunner: pass send_ok = False - try: - fut = asyncio.run_coroutine_threadsafe( - _status_adapter.send_clarify( - chat_id=_status_chat_id, - question=question, - choices=list(choices) if choices else None, - clarify_id=clarify_id, - session_key=session_key or "", - metadata=_status_thread_metadata, - ), - _loop_for_step, - ) - result = fut.result(timeout=15) - send_ok = bool(getattr(result, "success", False)) - except Exception as exc: - logger.warning("Clarify send failed: %s", exc) + fut = safe_schedule_threadsafe( + _status_adapter.send_clarify( + chat_id=_status_chat_id, + question=question, + choices=list(choices) if choices else None, + clarify_id=clarify_id, + session_key=session_key or "", + metadata=_status_thread_metadata, + ), + _loop_for_step, + logger=logger, + log_message="Clarify send failed to schedule", + ) + if fut is None: send_ok = False + else: + try: + result = fut.result(timeout=15) + send_ok = bool(getattr(result, "success", False)) + except Exception as exc: + logger.warning("Clarify send failed: %s", exc) + send_ok = False if not send_ok: # Couldn't deliver the prompt — clean up and return @@ -15343,7 +15351,7 @@ class GatewayRunner: # false positives from MagicMock auto-attribute creation in tests. if getattr(type(_status_adapter), "send_exec_approval", None) is not None: try: - _approval_result = asyncio.run_coroutine_threadsafe( + _approval_fut = safe_schedule_threadsafe( _status_adapter.send_exec_approval( chat_id=_status_chat_id, command=cmd, @@ -15352,7 +15360,12 @@ class GatewayRunner: metadata=_status_thread_metadata, ), _loop_for_step, - ).result(timeout=15) + logger=logger, + log_message="send_exec_approval scheduling error", + ) + if _approval_fut is None: + raise RuntimeError("send_exec_approval: loop unavailable") + _approval_result = _approval_fut.result(timeout=15) if _approval_result.success: return logger.warning( @@ -15374,14 +15387,18 @@ class GatewayRunner: f"for the session, `/approve always` to approve permanently, or `/deny` to cancel." ) try: - asyncio.run_coroutine_threadsafe( + _approval_send_fut = safe_schedule_threadsafe( _status_adapter.send( _status_chat_id, msg, metadata=_status_thread_metadata, ), _loop_for_step, - ).result(timeout=15) + logger=logger, + log_message="Approval text-send scheduling error", + ) + if _approval_send_fut is not None: + _approval_send_fut.result(timeout=15) except Exception as _e: logger.error("Failed to send approval request: %s", _e) @@ -16343,7 +16360,11 @@ class GatewayRunner: except Exception: pass try: - asyncio.run_coroutine_threadsafe(_delete_all(), _loop_snapshot) + safe_schedule_threadsafe( + _delete_all(), _loop_snapshot, + logger=logger, + log_message="Temp bubble cleanup scheduling error", + ) except Exception: pass @@ -16400,10 +16421,13 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in # this ticker runs in a background thread. Schedule onto # the gateway event loop and wait briefly for completion # so refresh failures are still logged via the except. - fut = asyncio.run_coroutine_threadsafe( - build_channel_directory(adapters), loop + fut = safe_schedule_threadsafe( + build_channel_directory(adapters), loop, + logger=logger, + log_message="Channel directory refresh scheduling error", ) - fut.result(timeout=30) + if fut is not None: + fut.result(timeout=30) except Exception as e: logger.debug("Channel directory refresh error: %s", e) diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py index 3a42a320453..52b1ac247f1 100644 --- a/plugins/memory/hindsight/__init__.py +++ b/plugins/memory/hindsight/__init__.py @@ -221,8 +221,11 @@ def _get_loop() -> asyncio.AbstractEventLoop: def _run_sync(coro, timeout: float = _DEFAULT_TIMEOUT): """Schedule *coro* on the shared loop and block until done.""" + from agent.async_utils import safe_schedule_threadsafe loop = _get_loop() - future = asyncio.run_coroutine_threadsafe(coro, loop) + future = safe_schedule_threadsafe(coro, loop) + if future is None: + raise RuntimeError("Hindsight loop unavailable") return future.result(timeout=timeout) diff --git a/plugins/platforms/google_chat/adapter.py b/plugins/platforms/google_chat/adapter.py index 1d58e801f46..d8777bf7101 100644 --- a/plugins/platforms/google_chat/adapter.py +++ b/plugins/platforms/google_chat/adapter.py @@ -670,10 +670,18 @@ class GoogleChatAdapter(BasePlatformAdapter): logger.warning("[GoogleChat] Loop not accepting callbacks; dropping event") return try: - future = asyncio.run_coroutine_threadsafe(coro, loop) + from agent.async_utils import safe_schedule_threadsafe + future = safe_schedule_threadsafe( + coro, loop, + logger=logger, + log_message="[GoogleChat] Failed to schedule background callback", + log_level=logging.WARNING, + ) except RuntimeError: logger.warning("[GoogleChat] Loop closed between check and submit") return + if future is None: + return future.add_done_callback(self._log_background_failure) # ------------------------------------------------------------------ diff --git a/scripts/release.py b/scripts/release.py index 740b79091b1..c9cd9c173c0 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -62,6 +62,7 @@ AUTHOR_MAP = { "nidhi2894@gmail.com": "nidhi-singh02", "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel", "oleksii.lisikh@gmail.com": "olisikh", + "jithendranaidunara@gmail.com": "JithendraNara", "jeremy@geocaching.com": "outdoorsea", "leone.parise@gmail.com": "leoneparise", "mr@shu.io": "mrshu", diff --git a/tests/acp/test_events.py b/tests/acp/test_events.py index c9f91a181ed..56a2687226c 100644 --- a/tests/acp/test_events.py +++ b/tests/acp/test_events.py @@ -1,6 +1,8 @@ """Tests for acp_adapter.events — callback factories for ACP notifications.""" import asyncio +import gc +import warnings from concurrent.futures import Future from unittest.mock import AsyncMock, MagicMock, patch @@ -10,6 +12,7 @@ import acp from acp.schema import ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk from acp_adapter.events import ( + _send_update, make_message_cb, make_step_cb, make_thinking_cb, @@ -325,3 +328,46 @@ class TestMessageCallback: cb("") mock_rcts.assert_not_called() + + +# --------------------------------------------------------------------------- +# Scheduler-failure regression +# --------------------------------------------------------------------------- + +class TestSendUpdate: + def test_scheduler_failure_closes_update_coroutine(self, event_loop_fixture): + """If run_coroutine_threadsafe raises, _send_update must close the coro.""" + created = {"coro": None} + + async def _session_update(session_id, update): + return None + + conn = MagicMock() + + def _capture_update(session_id, update): + created["coro"] = _session_update(session_id, update) + return created["coro"] + + conn.session_update = _capture_update + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + with patch( + "agent.async_utils.asyncio.run_coroutine_threadsafe", + side_effect=RuntimeError("scheduler down"), + ): + _send_update(conn, "session-1", event_loop_fixture, {"type": "noop"}) + gc.collect() + + assert created["coro"] is not None + assert created["coro"].cr_frame is None + # Only count warnings about THIS test's coroutine; other tests in the + # same xdist worker (or stdlib mock internals) may emit unrelated + # "coroutine was never awaited" warnings that bleed through. + runtime_warnings = [ + w for w in caught + if issubclass(w.category, RuntimeWarning) + and "was never awaited" in str(w.message) + and "_session_update" in str(w.message) + ] + assert runtime_warnings == [] diff --git a/tests/acp/test_permissions.py b/tests/acp/test_permissions.py index 8bbdeeb392a..b4c121829dc 100644 --- a/tests/acp/test_permissions.py +++ b/tests/acp/test_permissions.py @@ -38,7 +38,7 @@ def _invoke_callback( scheduled["loop"] = passed_loop return future - with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", side_effect=_schedule): + with patch("agent.async_utils.asyncio.run_coroutine_threadsafe", side_effect=_schedule): cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=timeout) if use_prompt_path: result = prompt_dangerous_approval( @@ -135,7 +135,7 @@ class TestApprovalBridge: scheduled["loop"] = passed_loop return future - with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", side_effect=_schedule): + with patch("agent.async_utils.asyncio.run_coroutine_threadsafe", side_effect=_schedule): cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=0.01) result = cb("rm -rf /", "dangerous command") @@ -159,10 +159,53 @@ class TestApprovalBridge: scheduled["loop"] = passed_loop return future - with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", side_effect=_schedule): + with patch("agent.async_utils.asyncio.run_coroutine_threadsafe", side_effect=_schedule): cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=1.0) result = cb("echo hi", "demo") scheduled["coro"].close() assert result == "deny" + + +# --------------------------------------------------------------------------- +# Scheduler-failure regression +# --------------------------------------------------------------------------- + +import gc # noqa: E402 +import warnings # noqa: E402 + + +class TestSchedulerFailure: + def test_scheduler_failure_closes_permission_coroutine(self): + """If run_coroutine_threadsafe raises, the coro is closed and we return 'deny'.""" + loop = MagicMock(spec=asyncio.AbstractEventLoop) + created = {"coro": None} + + async def _response_coro(**kwargs): + return _make_response(AllowedOutcome(option_id="allow_once", outcome="selected")) + + def _request_permission(**kwargs): + created["coro"] = _response_coro(**kwargs) + return created["coro"] + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + with patch( + "agent.async_utils.asyncio.run_coroutine_threadsafe", + side_effect=RuntimeError("scheduler down"), + ): + cb = make_approval_callback(_request_permission, loop, session_id="s1", timeout=0.01) + result = cb("rm -rf /", "dangerous") + gc.collect() + + assert result == "deny" + assert created["coro"] is not None + assert created["coro"].cr_frame is None + runtime_warnings = [ + w for w in caught + if issubclass(w.category, RuntimeWarning) + and "was never awaited" in str(w.message) + and "_response_coro" in str(w.message) + ] + assert runtime_warnings == [] diff --git a/tests/agent/test_async_utils.py b/tests/agent/test_async_utils.py new file mode 100644 index 00000000000..33ce84ee0c6 --- /dev/null +++ b/tests/agent/test_async_utils.py @@ -0,0 +1,157 @@ +"""Tests for agent.async_utils.safe_schedule_threadsafe.""" + +from __future__ import annotations + +import asyncio +import gc +import warnings +from concurrent.futures import Future +from unittest.mock import patch + +import pytest + +from agent.async_utils import safe_schedule_threadsafe + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _no_unawaited_warnings(caught, *, coro_name: str = "") -> bool: + """Return True if no "X was never awaited" warning slipped through. + + When *coro_name* is provided, only warnings naming that coroutine are + counted — xdist workers may emit unrelated unawaited-coroutine warnings + (e.g. ``AsyncMockMixin._execute_mock_call``) from concurrent tests. + """ + bad = [ + w for w in caught + if issubclass(w.category, RuntimeWarning) + and "was never awaited" in str(w.message) + and (not coro_name or coro_name in str(w.message)) + ] + return not bad + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +class TestSafeScheduleThreadsafe: + def test_returns_future_on_success(self): + loop = asyncio.new_event_loop() + try: + import threading + ready = threading.Event() + stop = threading.Event() + + def _runner(): + asyncio.set_event_loop(loop) + ready.set() + loop.run_until_complete(_wait_for_stop(stop)) + + async def _wait_for_stop(ev): + while not ev.is_set(): + await asyncio.sleep(0.005) + + t = threading.Thread(target=_runner, daemon=True) + t.start() + ready.wait(timeout=2) + + async def _sample(): + return 42 + + fut = safe_schedule_threadsafe(_sample(), loop) + assert isinstance(fut, Future) + assert fut.result(timeout=2) == 42 + + stop.set() + t.join(timeout=2) + finally: + if loop.is_running(): + loop.call_soon_threadsafe(loop.stop) + loop.close() + + def test_closed_loop_returns_none_and_closes_coroutine(self): + loop = asyncio.new_event_loop() + loop.close() + + async def _sample(): + return "ok" + + coro = _sample() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + result = safe_schedule_threadsafe(coro, loop) + del coro + gc.collect() + + assert result is None + assert _no_unawaited_warnings(caught, coro_name='_sample') + + def test_none_loop_returns_none_and_closes_coroutine(self): + async def _sample(): + return "ok" + + coro = _sample() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + result = safe_schedule_threadsafe(coro, None) + del coro + gc.collect() + + assert result is None + assert _no_unawaited_warnings(caught, coro_name='_sample') + + def test_scheduling_exception_closes_coroutine(self): + """If run_coroutine_threadsafe raises, close the coroutine and return None.""" + # A loop that *looks* open but raises on submission + loop = asyncio.new_event_loop() + try: + async def _sample(): + return "ok" + + coro = _sample() + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + with patch( + "agent.async_utils.asyncio.run_coroutine_threadsafe", + side_effect=RuntimeError("scheduler down"), + ): + result = safe_schedule_threadsafe(coro, loop) + del coro + gc.collect() + + assert result is None + assert _no_unawaited_warnings(caught, coro_name='_sample') + finally: + loop.close() + + def test_logs_at_specified_level(self, caplog): + import logging + loop = asyncio.new_event_loop() + loop.close() + + async def _sample(): + return None + + custom = logging.getLogger("test_async_utils") + with caplog.at_level(logging.WARNING, logger="test_async_utils"): + result = safe_schedule_threadsafe( + _sample(), loop, + logger=custom, + log_message="custom-msg", + log_level=logging.WARNING, + ) + + assert result is None + assert any("custom-msg" in rec.message for rec in caplog.records) + + def test_non_coroutine_arg_does_not_crash(self): + """Defensive: even if the caller hands us something weird, don't blow up.""" + loop = asyncio.new_event_loop() + loop.close() + + # Pass a non-coroutine sentinel + result = safe_schedule_threadsafe("not-a-coroutine", loop) # type: ignore[arg-type] + assert result is None diff --git a/tests/tools/test_mcp_probe.py b/tests/tools/test_mcp_probe.py index 46459e44c87..89d4d1478d1 100644 --- a/tests/tools/test_mcp_probe.py +++ b/tests/tools/test_mcp_probe.py @@ -69,7 +69,8 @@ class TestProbeMcpServerTools: patch("tools.mcp_tool._stop_mcp_loop"): # Simulate running the async probe - def run_coro(coro, timeout=120): + def run_coro(coro_or_factory, timeout=120): + coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory loop = asyncio.new_event_loop() try: return loop.run_until_complete(coro) @@ -110,7 +111,8 @@ class TestProbeMcpServerTools: patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \ patch("tools.mcp_tool._stop_mcp_loop"): - def run_coro(coro, timeout=120): + def run_coro(coro_or_factory, timeout=120): + coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory loop = asyncio.new_event_loop() try: return loop.run_until_complete(coro) @@ -144,7 +146,8 @@ class TestProbeMcpServerTools: patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \ patch("tools.mcp_tool._stop_mcp_loop"): - def run_coro(coro, timeout=120): + def run_coro(coro_or_factory, timeout=120): + coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory loop = asyncio.new_event_loop() try: return loop.run_until_complete(coro) @@ -198,7 +201,8 @@ class TestProbeMcpServerTools: patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \ patch("tools.mcp_tool._stop_mcp_loop"): - def run_coro(coro, timeout=120): + def run_coro(coro_or_factory, timeout=120): + coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory loop = asyncio.new_event_loop() try: return loop.run_until_complete(coro) diff --git a/tests/tools/test_mcp_structured_content.py b/tests/tools/test_mcp_structured_content.py index 2870ce1e860..f4cda00f9f0 100644 --- a/tests/tools/test_mcp_structured_content.py +++ b/tests/tools/test_mcp_structured_content.py @@ -31,7 +31,8 @@ class _FakeCallToolResult: self.structuredContent = structuredContent -def _fake_run_on_mcp_loop(coro, timeout=30): +def _fake_run_on_mcp_loop(coro_or_factory, timeout=30): + coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory """Run an MCP coroutine directly in a fresh event loop.""" loop = asyncio.new_event_loop() try: diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py index 5558a0df48c..7f6c3f6704c 100644 --- a/tests/tools/test_mcp_tool.py +++ b/tests/tools/test_mcp_tool.py @@ -397,6 +397,77 @@ class TestCheckFunction: _servers.pop("test_server", None) +# --------------------------------------------------------------------------- +# MCP loop runner +# --------------------------------------------------------------------------- + +class TestRunOnMcpLoop: + def test_scheduler_failure_closes_factory_coroutine(self): + """If run_coroutine_threadsafe raises, the factory's coroutine is closed.""" + import gc + import warnings + import tools.mcp_tool as mcp + + created = {"coro": None} + + async def _sample(): + return "ok" + + def factory(): + created["coro"] = _sample() + return created["coro"] + + fake_loop = MagicMock() + fake_loop.is_running.return_value = True + + with patch.object(mcp, "_mcp_loop", fake_loop): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + with patch( + "agent.async_utils.asyncio.run_coroutine_threadsafe", + side_effect=RuntimeError("scheduler down"), + ): + with pytest.raises(RuntimeError): + mcp._run_on_mcp_loop(factory) + gc.collect() + + assert created["coro"] is not None + assert created["coro"].cr_frame is None + runtime_warnings = [ + w for w in caught + if issubclass(w.category, RuntimeWarning) + and "was never awaited" in str(w.message) + and "_sample" in str(w.message) + ] + assert runtime_warnings == [] + + def test_dead_loop_closes_passed_coroutine(self): + """If loop is None, a passed coroutine (not factory) is closed.""" + import gc + import warnings + import tools.mcp_tool as mcp + + async def _sample(): + return "ok" + + coro = _sample() + with patch.object(mcp, "_mcp_loop", None): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + with pytest.raises(RuntimeError, match="not running"): + mcp._run_on_mcp_loop(coro) + gc.collect() + + assert coro.cr_frame is None + runtime_warnings = [ + w for w in caught + if issubclass(w.category, RuntimeWarning) + and "was never awaited" in str(w.message) + and "_sample" in str(w.message) + ] + assert runtime_warnings == [] + + # --------------------------------------------------------------------------- # Tool handler # --------------------------------------------------------------------------- @@ -406,7 +477,8 @@ class TestToolHandler: def _patch_mcp_loop(self, coro_side_effect=None): """Return a patch for _run_on_mcp_loop that runs the coroutine directly.""" - def fake_run(coro, timeout=30): + def fake_run(coro_or_factory, timeout=30): + coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory return asyncio.run(coro) if coro_side_effect: return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=coro_side_effect) @@ -485,7 +557,8 @@ class TestToolHandler: try: handler = _make_tool_handler("test_srv", "greet", 120) - def _interrupting_run(coro, timeout=30): + def _interrupting_run(coro_or_factory, timeout=30): + coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory coro.close() raise InterruptedError("User sent a new message") with patch( @@ -1792,7 +1865,8 @@ class TestUtilityHandlers: def _patch_mcp_loop(self): """Return a patch for _run_on_mcp_loop that runs the coroutine directly.""" - def fake_run(coro, timeout=30): + def fake_run(coro_or_factory, timeout=30): + coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory return asyncio.run(coro) return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=fake_run) diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py index 8e829556a57..f10a1541923 100644 --- a/tools/browser_cdp_tool.py +++ b/tools/browser_cdp_tool.py @@ -274,7 +274,13 @@ def _browser_cdp_via_supervisor( ) try: - fut = _asyncio.run_coroutine_threadsafe(_do_cdp(), loop) + from agent.async_utils import safe_schedule_threadsafe + fut = safe_schedule_threadsafe(_do_cdp(), loop) + if fut is None: + return tool_error( + "CDP call via supervisor failed: loop unavailable", + cdp_docs=CDP_DOCS_URL, + ) result_msg = fut.result(timeout=timeout + 2) except Exception as exc: return tool_error( diff --git a/tools/browser_supervisor.py b/tools/browser_supervisor.py index af8d40ee185..73dd3e51bb5 100644 --- a/tools/browser_supervisor.py +++ b/tools/browser_supervisor.py @@ -368,11 +368,13 @@ class CDPSupervisor: pass try: - fut = asyncio.run_coroutine_threadsafe(_close_ws(), loop) - try: - fut.result(timeout=2.0) - except Exception: - pass + from agent.async_utils import safe_schedule_threadsafe + fut = safe_schedule_threadsafe(_close_ws(), loop) + if fut is not None: + try: + fut.result(timeout=2.0) + except Exception: + pass except RuntimeError: pass # loop already shutting down if self._thread is not None: @@ -451,7 +453,10 @@ class CDPSupervisor: ) try: - fut = asyncio.run_coroutine_threadsafe(_do_respond(), loop) + from agent.async_utils import safe_schedule_threadsafe + fut = safe_schedule_threadsafe(_do_respond(), loop) + if fut is None: + return {"ok": False, "error": "Browser supervisor loop unavailable"} fut.result(timeout=timeout) except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} @@ -507,7 +512,10 @@ class CDPSupervisor: ) try: - fut = asyncio.run_coroutine_threadsafe(_do_eval(), loop) + from agent.async_utils import safe_schedule_threadsafe + fut = safe_schedule_threadsafe(_do_eval(), loop) + if fut is None: + return {"ok": False, "error": "Browser supervisor loop unavailable"} response = fut.result(timeout=timeout + 1) except Exception as exc: return {"ok": False, "error": f"{type(exc).__name__}: {exc}"} diff --git a/tools/computer_use/cua_backend.py b/tools/computer_use/cua_backend.py index df1162c5d79..96aab60f8c7 100644 --- a/tools/computer_use/cua_backend.py +++ b/tools/computer_use/cua_backend.py @@ -183,9 +183,14 @@ class _AsyncBridge: raise RuntimeError("cua-driver asyncio bridge failed to start") def run(self, coro, timeout: Optional[float] = 30.0) -> Any: + from agent.async_utils import safe_schedule_threadsafe if not self._loop or not self._thread or not self._thread.is_alive(): + if asyncio.iscoroutine(coro): + coro.close() + raise RuntimeError("cua-driver bridge not started") + fut = safe_schedule_threadsafe(coro, self._loop) + if fut is None: raise RuntimeError("cua-driver bridge not started") - fut: Future = asyncio.run_coroutine_threadsafe(coro, self._loop) return fut.result(timeout=timeout) def stop(self) -> None: diff --git a/tools/environments/modal.py b/tools/environments/modal.py index 1a230d85603..3137b322113 100644 --- a/tools/environments/modal.py +++ b/tools/environments/modal.py @@ -144,9 +144,14 @@ class _AsyncWorker: self._loop.run_forever() def run_coroutine(self, coro, timeout=600): + from agent.async_utils import safe_schedule_threadsafe if self._loop is None or self._loop.is_closed(): + if asyncio.iscoroutine(coro): + coro.close() + raise RuntimeError("AsyncWorker loop is not running") + future = safe_schedule_threadsafe(coro, self._loop) + if future is None: raise RuntimeError("AsyncWorker loop is not running") - future = asyncio.run_coroutine_threadsafe(coro, self._loop) return future.result(timeout=timeout) def stop(self): diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index c2668395e5d..ba104cc4273 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -1781,7 +1781,7 @@ def _handle_auth_error_and_retry( return await manager.handle_401(server_name, None) try: - recovered = _run_on_mcp_loop(_recover(), timeout=10) + recovered = _run_on_mcp_loop(_recover, timeout=10) except Exception as rec_exc: logger.warning( "MCP OAuth '%s': recovery attempt failed: %s", @@ -2054,19 +2054,35 @@ def _ensure_mcp_loop(): _mcp_thread.start() -def _run_on_mcp_loop(coro, timeout: float = 30): +def _run_on_mcp_loop(coro_or_factory, timeout: float = 30): """Schedule a coroutine on the MCP event loop and block until done. + Accepts either a coroutine object or a zero-arg callable that returns one. + Callers can pass a factory to avoid constructing coroutine objects when + the MCP loop is unavailable (which would otherwise leak the coroutine + frame and emit ``"coroutine was never awaited"`` warnings). + Poll in short intervals so the calling agent thread can honor user interrupts while the MCP work is still running on the background loop. """ from tools.interrupt import is_interrupted + from agent.async_utils import safe_schedule_threadsafe with _lock: loop = _mcp_loop if loop is None or not loop.is_running(): + if asyncio.iscoroutine(coro_or_factory): + coro_or_factory.close() raise RuntimeError("MCP event loop is not running") - future = asyncio.run_coroutine_threadsafe(coro, loop) + + coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory + future = safe_schedule_threadsafe( + coro, loop, + logger=logger, + log_message="MCP scheduling failed", + ) + if future is None: + raise RuntimeError("MCP event loop unavailable (failed to schedule)") start_time = time.monotonic() deadline = None if timeout is None else start_time + timeout @@ -2263,7 +2279,7 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float): return json.dumps({"result": text_result}, ensure_ascii=False) def _call_once(): - return _run_on_mcp_loop(_call(), timeout=tool_timeout) + return _run_on_mcp_loop(_call, timeout=tool_timeout) try: result = _call_once() @@ -2343,7 +2359,7 @@ def _make_list_resources_handler(server_name: str, tool_timeout: float): return json.dumps({"resources": resources}, ensure_ascii=False) def _call_once(): - return _run_on_mcp_loop(_call(), timeout=tool_timeout) + return _run_on_mcp_loop(_call, timeout=tool_timeout) try: return _call_once() @@ -2403,7 +2419,7 @@ def _make_read_resource_handler(server_name: str, tool_timeout: float): return json.dumps({"result": "\n".join(parts) if parts else ""}, ensure_ascii=False) def _call_once(): - return _run_on_mcp_loop(_call(), timeout=tool_timeout) + return _run_on_mcp_loop(_call, timeout=tool_timeout) try: return _call_once() @@ -2466,7 +2482,7 @@ def _make_list_prompts_handler(server_name: str, tool_timeout: float): return json.dumps({"prompts": prompts}, ensure_ascii=False) def _call_once(): - return _run_on_mcp_loop(_call(), timeout=tool_timeout) + return _run_on_mcp_loop(_call, timeout=tool_timeout) try: return _call_once() @@ -2537,7 +2553,7 @@ def _make_get_prompt_handler(server_name: str, tool_timeout: float): return json.dumps(resp, ensure_ascii=False) def _call_once(): - return _run_on_mcp_loop(_call(), timeout=tool_timeout) + return _run_on_mcp_loop(_call, timeout=tool_timeout) try: return _call_once() @@ -3121,7 +3137,7 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]: if _was_interrupted: _set_interrupt(False) try: - _run_on_mcp_loop(_discover_all(), timeout=120) + _run_on_mcp_loop(_discover_all, timeout=120) finally: if _was_interrupted: _set_interrupt(True) @@ -3289,7 +3305,7 @@ def probe_mcp_server_tools() -> Dict[str, List[tuple]]: ) try: - _run_on_mcp_loop(_probe_all(), timeout=120) + _run_on_mcp_loop(_probe_all, timeout=120) except Exception as exc: logger.debug("MCP probe failed: %s", exc) finally: @@ -3329,11 +3345,17 @@ def shutdown_mcp_servers(): with _lock: loop = _mcp_loop if loop is not None and loop.is_running(): - try: - future = asyncio.run_coroutine_threadsafe(_shutdown(), loop) - future.result(timeout=15) - except Exception as exc: - logger.debug("Error during MCP shutdown: %s", exc) + from agent.async_utils import safe_schedule_threadsafe + future = safe_schedule_threadsafe( + _shutdown(), loop, + logger=logger, + log_message="MCP shutdown: failed to schedule", + ) + if future is not None: + try: + future.result(timeout=15) + except Exception as exc: + logger.debug("Error during MCP shutdown: %s", exc) _stop_mcp_loop() diff --git a/tools/slash_confirm.py b/tools/slash_confirm.py index 81c15263527..21db18fe319 100644 --- a/tools/slash_confirm.py +++ b/tools/slash_confirm.py @@ -153,9 +153,14 @@ def resolve_sync_compat( Prefer the async ``resolve()`` from an async context. """ try: - fut = asyncio.run_coroutine_threadsafe( + from agent.async_utils import safe_schedule_threadsafe + fut = safe_schedule_threadsafe( resolve(session_key, confirm_id, choice), loop, + logger=logger, + log_message="resolve_sync_compat scheduling failed", ) + if fut is None: + return None return fut.result(timeout=30) except Exception as exc: logger.error("resolve_sync_compat failed: %s", exc) diff --git a/tui_gateway/ws.py b/tui_gateway/ws.py index 1661811dbd6..a5879ef3a1c 100644 --- a/tui_gateway/ws.py +++ b/tui_gateway/ws.py @@ -83,7 +83,11 @@ class WSTransport: return True try: - fut = asyncio.run_coroutine_threadsafe(self._safe_send(line), self._loop) + from agent.async_utils import safe_schedule_threadsafe + fut = safe_schedule_threadsafe(self._safe_send(line), self._loop) + if fut is None: + self._closed = True + return False fut.result(timeout=_WS_WRITE_TIMEOUT_S) return not self._closed except Exception as exc: From 13c3d4b4efa2f39d7bc3178cf3eca77167ff7699 Mon Sep 17 00:00:00 2001 From: kchantharuan Date: Wed, 13 May 2026 12:46:07 -0700 Subject: [PATCH 055/218] feat(nvidia): add NIM billing origin header --- agent/auxiliary_client.py | 25 +++++++ run_agent.py | 30 ++++++-- tests/agent/test_auxiliary_client.py | 41 +++++++++++ tests/providers/test_provider_profiles.py | 4 ++ .../test_provider_attribution_headers.py | 68 +++++++++++++++++++ 5 files changed, 162 insertions(+), 6 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index cd655e70e56..1c7dd9f7497 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -369,6 +369,21 @@ def build_or_headers(or_config: dict | None = None) -> dict: return headers + +# NVIDIA NIM cloud billing attribution. Keep this host-gated because the +# nvidia provider also supports local/on-prem NIM endpoints via NVIDIA_BASE_URL. +_NVIDIA_NIM_CLOUD_HEADERS = { + "X-BILLING-INVOKE-ORIGIN": "HermesAgent", +} + + +def build_nvidia_nim_headers(base_url: str | None) -> dict: + """Return NVIDIA NIM cloud attribution headers for build.nvidia.com traffic.""" + if base_url_host_matches(str(base_url or ""), "integrate.api.nvidia.com"): + return dict(_NVIDIA_NIM_CLOUD_HEADERS) + return {} + + # Vercel AI Gateway app attribution headers. HTTP-Referer maps to # referrerUrl and X-Title maps to appName in the gateway's analytics. from hermes_cli import __version__ as _HERMES_VERSION @@ -1372,6 +1387,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() + elif base_url_host_matches(base_url, "integrate.api.nvidia.com"): + extra["default_headers"] = build_nvidia_nim_headers(base_url) else: try: from providers import get_provider_profile as _gpf_aux @@ -1407,6 +1424,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() + elif base_url_host_matches(base_url, "integrate.api.nvidia.com"): + extra["default_headers"] = build_nvidia_nim_headers(base_url) else: try: from providers import get_provider_profile as _gpf_aux2 @@ -2690,6 +2709,8 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False): ) elif base_url_host_matches(sync_base_url, "api.kimi.com"): async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"} + elif base_url_host_matches(sync_base_url, "integrate.api.nvidia.com"): + async_kwargs["default_headers"] = build_nvidia_nim_headers(sync_base_url) else: # Fall back to profile.default_headers for providers that declare # client-level headers on their ProviderProfile (e.g. attribution @@ -2951,6 +2972,8 @@ def resolve_provider_client( extra["default_headers"] = copilot_request_headers( is_agent_turn=True, is_vision=is_vision ) + elif base_url_host_matches(custom_base, "integrate.api.nvidia.com"): + extra["default_headers"] = build_nvidia_nim_headers(custom_base) else: # Fall back to profile.default_headers for providers that # declare client-level attribution headers on their profile. @@ -3149,6 +3172,8 @@ def resolve_provider_client( headers.update(copilot_request_headers( is_agent_turn=True, is_vision=is_vision )) + elif base_url_host_matches(base_url, "integrate.api.nvidia.com"): + headers.update(build_nvidia_nim_headers(base_url)) else: # Fall back to profile.default_headers for providers that declare # client-level attribution headers on their profile (e.g. GMI diff --git a/run_agent.py b/run_agent.py index a82c6417ae1..7e42beb3eba 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1664,6 +1664,9 @@ class AIAgent: if base_url_host_matches(effective_base, "openrouter.ai"): from agent.auxiliary_client import build_or_headers client_kwargs["default_headers"] = build_or_headers() + elif base_url_host_matches(effective_base, "integrate.api.nvidia.com"): + from agent.auxiliary_client import build_nvidia_nim_headers + client_kwargs["default_headers"] = build_nvidia_nim_headers(effective_base) elif base_url_host_matches(effective_base, "api.routermint.com"): client_kwargs["default_headers"] = _routermint_headers() elif base_url_host_matches(effective_base, "api.githubcopilot.com"): @@ -1702,9 +1705,15 @@ class AIAgent: } if _provider_timeout is not None: client_kwargs["timeout"] = _provider_timeout - # Preserve any default_headers the router set - if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers: - client_kwargs["default_headers"] = dict(_routed_client._default_headers) + # Preserve provider-specific headers the router set. The + # OpenAI SDK stores caller-provided default_headers in + # _custom_headers; older/mocked clients may expose + # _default_headers instead. + _routed_headers = getattr(_routed_client, "_custom_headers", None) + if not _routed_headers: + _routed_headers = getattr(_routed_client, "_default_headers", None) + if _routed_headers: + client_kwargs["default_headers"] = dict(_routed_headers) else: # When the user explicitly chose a non-OpenRouter provider # but no credentials were found, fail fast with a clear @@ -1753,8 +1762,11 @@ class AIAgent: } if _provider_timeout is not None: client_kwargs["timeout"] = _provider_timeout - if hasattr(_fb_client, "_default_headers") and _fb_client._default_headers: - client_kwargs["default_headers"] = dict(_fb_client._default_headers) + _fb_headers = getattr(_fb_client, "_custom_headers", None) + if not _fb_headers: + _fb_headers = getattr(_fb_client, "_default_headers", None) + if _fb_headers: + client_kwargs["default_headers"] = dict(_fb_headers) _fb_resolved = True break if not _fb_resolved: @@ -7334,12 +7346,18 @@ class AIAgent: return True def _apply_client_headers_for_base_url(self, base_url: str) -> None: - from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers + from agent.auxiliary_client import ( + _AI_GATEWAY_HEADERS, + build_nvidia_nim_headers, + build_or_headers, + ) if base_url_host_matches(base_url, "openrouter.ai"): self._client_kwargs["default_headers"] = build_or_headers() elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"): self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS) + elif base_url_host_matches(base_url, "integrate.api.nvidia.com"): + self._client_kwargs["default_headers"] = build_nvidia_nim_headers(base_url) elif base_url_host_matches(base_url, "api.routermint.com"): self._client_kwargs["default_headers"] = _routermint_headers() elif base_url_host_matches(base_url, "api.githubcopilot.com"): diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index c25ca219379..9dd85762956 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -2415,10 +2415,51 @@ def _clean_env(monkeypatch): """Strip provider env vars so each test starts clean.""" for key in ( "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", + "NVIDIA_API_KEY", "NVIDIA_BASE_URL", ): monkeypatch.delenv(key, raising=False) +class TestNvidiaBillingHeaders: + """NVIDIA NIM billing-origin headers are scoped to NVIDIA cloud.""" + + def test_resolve_provider_client_cloud_adds_billing_origin_header(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "nvidia-key") + monkeypatch.delenv("NVIDIA_BASE_URL", raising=False) + mock_openai = MagicMock() + mock_openai.return_value = MagicMock(name="nvidia-client") + + with patch("agent.auxiliary_client.OpenAI", mock_openai): + client, model = resolve_provider_client( + provider="nvidia", + model="nvidia/test-model", + ) + + assert client is not None + assert model == "nvidia/test-model" + call_kwargs = mock_openai.call_args[1] + headers = call_kwargs["default_headers"] + assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent" + + def test_resolve_provider_client_local_nim_skips_billing_origin_header(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "nvidia-key") + monkeypatch.setenv("NVIDIA_BASE_URL", "http://localhost:8000/v1") + mock_openai = MagicMock() + mock_openai.return_value = MagicMock(name="nvidia-local-client") + + with patch("agent.auxiliary_client.OpenAI", mock_openai): + client, model = resolve_provider_client( + provider="nvidia", + model="nvidia/test-model", + ) + + assert client is not None + assert model == "nvidia/test-model" + call_kwargs = mock_openai.call_args[1] + headers = call_kwargs.get("default_headers", {}) + assert "X-BILLING-INVOKE-ORIGIN" not in headers + + class TestOpenRouterExplicitApiKey: """Test that explicit_api_key is correctly propagated to _try_openrouter().""" diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py index c79ed2aea9b..df96a80fd80 100644 --- a/tests/providers/test_provider_profiles.py +++ b/tests/providers/test_provider_profiles.py @@ -42,6 +42,10 @@ class TestNvidiaProfile: p = get_provider_profile("nvidia") assert "nvidia.com" in p.base_url + def test_billing_header_not_profile_wide(self): + p = get_provider_profile("nvidia") + assert p.default_headers == {} + class TestKimiProfile: def test_temperature_omit(self): diff --git a/tests/run_agent/test_provider_attribution_headers.py b/tests/run_agent/test_provider_attribution_headers.py index 2a1d9088c46..a4ce301a857 100644 --- a/tests/run_agent/test_provider_attribution_headers.py +++ b/tests/run_agent/test_provider_attribution_headers.py @@ -3,6 +3,7 @@ Mirrors the OpenRouter pattern for the Vercel AI Gateway so that referrerUrl / appName / User-Agent flow into gateway analytics. """ +from types import SimpleNamespace from unittest.mock import MagicMock, patch from run_agent import AIAgent @@ -65,6 +66,73 @@ def test_routermint_base_url_applies_user_agent_header(mock_openai): assert headers["User-Agent"].startswith("HermesAgent/") +@patch("run_agent.OpenAI") +def test_nvidia_cloud_base_url_applies_billing_origin_header(mock_openai): + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key="test-key", + base_url="https://integrate.api.nvidia.com/v1", + model="nvidia/test-model", + provider="nvidia", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + assert agent._client_kwargs["default_headers"]["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent" + + agent._apply_client_headers_for_base_url("https://integrate.api.nvidia.com/v1") + + headers = agent._client_kwargs["default_headers"] + assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent" + + +@patch("run_agent.OpenAI") +def test_nvidia_local_base_url_does_not_apply_billing_origin_header(mock_openai): + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key="test-key", + base_url="https://integrate.api.nvidia.com/v1", + model="nvidia/test-model", + provider="nvidia", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + agent._client_kwargs["default_headers"] = { + "X-BILLING-INVOKE-ORIGIN": "HermesAgent", + } + + agent._apply_client_headers_for_base_url("http://localhost:8000/v1") + + assert "default_headers" not in agent._client_kwargs + + +@patch("run_agent.OpenAI") +def test_routed_client_preserves_openai_sdk_custom_headers(mock_openai): + mock_openai.return_value = MagicMock() + routed_client = SimpleNamespace( + api_key="test-key", + base_url="https://integrate.api.nvidia.com/v1", + _custom_headers={"X-BILLING-INVOKE-ORIGIN": "HermesAgent"}, + ) + + with patch("agent.auxiliary_client.resolve_provider_client", return_value=( + routed_client, + "nvidia/test-model", + )): + agent = AIAgent( + provider="nvidia", + model="nvidia/test-model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + headers = agent._client_kwargs["default_headers"] + assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent" + + @patch("run_agent.OpenAI") def test_gmi_base_url_picks_up_profile_user_agent(mock_openai): """GMI declares User-Agent on its ProviderProfile.default_headers. From 6fc0fa6e50a2eb6307c1e5afbeff360708b734ef Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:00:13 -0700 Subject: [PATCH 056/218] chore(release): add AUTHOR_MAP entry for kchantharuan@nvidia.com --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index c9cd9c173c0..aafa626329e 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -755,6 +755,7 @@ AUTHOR_MAP = { "zhujianxyz@gmail.com": "opriz", "tuancanhnguyen706@gmail.com": "xxxigm", "asurla@nvidia.com": "anniesurla", + "kchantharuan@nvidia.com": "nv-kasikritc", "limkuan24@gmail.com": "WideLee", "aviralarora002@gmail.com": "AviArora02-commits", "draixagent@gmail.com": "draix", From 4444d5fe4f65dcbca939a1f39ae58438205e7dad Mon Sep 17 00:00:00 2001 From: HenkDz Date: Fri, 15 May 2026 15:26:08 +0100 Subject: [PATCH 057/218] fix(acp): emit native plan updates for todo --- acp_adapter/events.py | 51 ++++++++++++++++++++++++++++++++++++++++ tests/acp/test_events.py | 32 ++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/acp_adapter/events.py b/acp_adapter/events.py index f0442ca2e8f..828807c3aef 100644 --- a/acp_adapter/events.py +++ b/acp_adapter/events.py @@ -14,6 +14,7 @@ from collections import deque from typing import Any, Callable, Deque, Dict import acp +from acp.schema import AgentPlanUpdate, PlanEntry from .tools import ( build_tool_complete, @@ -24,6 +25,52 @@ from .tools import ( logger = logging.getLogger(__name__) +def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None: + """Translate Hermes' todo tool result into ACP's native plan update. + + Zed renders ``sessionUpdate: plan`` as its first-class task/todo panel. The + Hermes agent already maintains task state through the ``todo`` tool, so the + ACP adapter should expose that state natively instead of only as a generic + tool-call transcript block. + """ + if not isinstance(result, str) or not result.strip(): + return None + + try: + data = json.loads(result) + except Exception: + return None + + if not isinstance(data, dict) or not isinstance(data.get("todos"), list): + return None + + status_map = { + "pending": "pending", + "in_progress": "in_progress", + "completed": "completed", + # ACP plans only support pending/in_progress/completed. Preserve + # cancelled tasks as terminal entries instead of dropping them and + # making the client's full-list replacement lose visible context. + "cancelled": "completed", + } + entries: list[PlanEntry] = [] + for item in data["todos"]: + if not isinstance(item, dict): + continue + content = str(item.get("content") or item.get("id") or "").strip() + if not content: + continue + raw_status = str(item.get("status") or "pending").strip() + status = status_map.get(raw_status, "pending") + if raw_status == "cancelled": + content = f"[cancelled] {content}" + entries.append(PlanEntry(content=content, priority="medium", status=status)) + + if not entries: + return None + return AgentPlanUpdate(session_update="plan", entries=entries) + + def _send_update( conn: acp.Client, session_id: str, @@ -175,6 +222,10 @@ def make_step_cb( snapshot=meta.get("snapshot"), ) _send_update(conn, session_id, loop, update) + if tool_name == "todo": + plan_update = _build_plan_update_from_todo_result(result) + if plan_update is not None: + _send_update(conn, session_id, loop, plan_update) if not queue: tool_call_ids.pop(tool_name, None) diff --git a/tests/acp/test_events.py b/tests/acp/test_events.py index 56a2687226c..ebddf076dbd 100644 --- a/tests/acp/test_events.py +++ b/tests/acp/test_events.py @@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest import acp -from acp.schema import ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk +from acp.schema import AgentPlanUpdate, ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk from acp_adapter.events import ( _send_update, @@ -296,6 +296,36 @@ class TestStepCallback: } mock_send.assert_called_once() + def test_todo_completion_emits_native_plan_update(self, mock_conn, event_loop_fixture): + from collections import deque + + tool_call_ids = {"todo": deque(["tc-todo"])} + loop = event_loop_fixture + cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {}) + todo_result = ( + '{"todos":[' + '{"id":"inspect","content":"Inspect ACP","status":"completed"},' + '{"id":"patch","content":"Patch renderer","status":"in_progress"},' + '{"id":"old","content":"Drop stale task","status":"cancelled"}' + '],"summary":{"total":3}}' + ) + + with patch("acp_adapter.events._send_update") as mock_send: + cb(1, [{"name": "todo", "result": todo_result}]) + + updates = [call.args[3] for call in mock_send.call_args_list] + plan_updates = [u for u in updates if getattr(u, "session_update", None) == "plan"] + assert len(plan_updates) == 1 + plan = plan_updates[0] + assert isinstance(plan, AgentPlanUpdate) + assert [entry.content for entry in plan.entries] == [ + "Inspect ACP", + "Patch renderer", + "[cancelled] Drop stale task", + ] + assert [entry.status for entry in plan.entries] == ["completed", "in_progress", "completed"] + assert [entry.priority for entry in plan.entries] == ["medium", "medium", "medium"] + # --------------------------------------------------------------------------- # Message callback From bd3a5873e11f084d74be876a505a406224a6ef3e Mon Sep 17 00:00:00 2001 From: HenkDz Date: Fri, 15 May 2026 16:15:04 +0100 Subject: [PATCH 058/218] fix(acp): replay native todo plans --- acp_adapter/events.py | 21 ++++++++++++---- acp_adapter/server.py | 8 ++++++- tests/acp/test_events.py | 27 +++++++++++++++++---- tests/acp/test_server.py | 52 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 9 deletions(-) diff --git a/acp_adapter/events.py b/acp_adapter/events.py index 828807c3aef..00e940b9ee0 100644 --- a/acp_adapter/events.py +++ b/acp_adapter/events.py @@ -25,6 +25,17 @@ from .tools import ( logger = logging.getLogger(__name__) +def _json_loads_maybe_prefix(value: str) -> Any: + """Parse a JSON object even when Hermes appended a human hint after it.""" + text = value.strip() + try: + return json.loads(text) + except Exception: + decoder = json.JSONDecoder() + data, _ = decoder.raw_decode(text) + return data + + def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None: """Translate Hermes' todo tool result into ACP's native plan update. @@ -37,13 +48,17 @@ def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None: return None try: - data = json.loads(result) + data = _json_loads_maybe_prefix(result) except Exception: return None if not isinstance(data, dict) or not isinstance(data.get("todos"), list): return None + todos = data["todos"] + if not todos: + return AgentPlanUpdate(session_update="plan", entries=[]) + status_map = { "pending": "pending", "in_progress": "in_progress", @@ -54,7 +69,7 @@ def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None: "cancelled": "completed", } entries: list[PlanEntry] = [] - for item in data["todos"]: + for item in todos: if not isinstance(item, dict): continue content = str(item.get("content") or item.get("id") or "").strip() @@ -66,8 +81,6 @@ def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None: content = f"[cancelled] {content}" entries.append(PlanEntry(content=content, priority="medium", status=status)) - if not entries: - return None return AgentPlanUpdate(session_update="plan", entries=entries) diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 20c4d7cdb4f..71fce1890d1 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -59,6 +59,7 @@ from acp.schema import ( from acp_adapter.auth import TERMINAL_SETUP_AUTH_METHOD_ID, build_auth_methods, detect_provider from acp_adapter.events import ( + _build_plan_update_from_todo_result, make_message_cb, make_step_cb, make_thinking_cb, @@ -910,15 +911,20 @@ class HermesACPAgent(acp.Agent): if not tool_call_id or not tool_name: continue result = message.get("content") + result_text = result if isinstance(result, str) else None if not await _send( build_tool_complete( tool_call_id, tool_name, - result=result if isinstance(result, str) else None, + result=result_text, function_args=function_args, ) ): return + if tool_name == "todo": + plan_update = _build_plan_update_from_todo_result(result_text) + if plan_update is not None and not await _send(plan_update): + return async def new_session( self, diff --git a/tests/acp/test_events.py b/tests/acp/test_events.py index ebddf076dbd..ec0b32549da 100644 --- a/tests/acp/test_events.py +++ b/tests/acp/test_events.py @@ -12,6 +12,7 @@ import acp from acp.schema import AgentPlanUpdate, ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk from acp_adapter.events import ( + _build_plan_update_from_todo_result, _send_update, make_message_cb, make_step_cb, @@ -296,7 +297,7 @@ class TestStepCallback: } mock_send.assert_called_once() - def test_todo_completion_emits_native_plan_update(self, mock_conn, event_loop_fixture): + def test_todo_completion_emits_native_plan_update_after_tool_completion(self, mock_conn, event_loop_fixture): from collections import deque tool_call_ids = {"todo": deque(["tc-todo"])} @@ -314,9 +315,11 @@ class TestStepCallback: cb(1, [{"name": "todo", "result": todo_result}]) updates = [call.args[3] for call in mock_send.call_args_list] - plan_updates = [u for u in updates if getattr(u, "session_update", None) == "plan"] - assert len(plan_updates) == 1 - plan = plan_updates[0] + assert [getattr(update, "session_update", None) for update in updates] == [ + "tool_call_update", + "plan", + ] + plan = updates[1] assert isinstance(plan, AgentPlanUpdate) assert [entry.content for entry in plan.entries] == [ "Inspect ACP", @@ -326,6 +329,22 @@ class TestStepCallback: assert [entry.status for entry in plan.entries] == ["completed", "in_progress", "completed"] assert [entry.priority for entry in plan.entries] == ["medium", "medium", "medium"] + def test_todo_plan_update_parses_json_with_trailing_hint(self): + result = '{"todos":[{"id":"ship","content":"Ship ACP plan","status":"pending"}]}\n\n[Hint: persisted]' + + update = _build_plan_update_from_todo_result(result) + + assert isinstance(update, AgentPlanUpdate) + assert [entry.content for entry in update.entries] == ["Ship ACP plan"] + assert [entry.status for entry in update.entries] == ["pending"] + + def test_todo_plan_update_with_empty_todos_clears_plan(self): + update = _build_plan_update_from_todo_result('{"todos":[],"summary":{"total":0}}') + + assert isinstance(update, AgentPlanUpdate) + assert update.session_update == "plan" + assert update.entries == [] + # --------------------------------------------------------------------------- # Message callback diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py index 6e2039d2b24..511d6e00934 100644 --- a/tests/acp/test_server.py +++ b/tests/acp/test_server.py @@ -12,6 +12,7 @@ from acp.agent.router import build_agent_router from acp.schema import ( AgentCapabilities, AgentMessageChunk, + AgentPlanUpdate, AuthenticateResponse, AvailableCommandsUpdate, Implementation, @@ -391,6 +392,57 @@ class TestSessionOps: assert "Search results" in tool_updates[1].content[0].content.text assert "cli.py:42" in tool_updates[1].content[0].content.text + @pytest.mark.asyncio + async def test_load_session_replays_native_plan_for_persisted_todo_tool(self, agent): + """Persisted todo tool results should rebuild Zed's native plan panel.""" + mock_conn = MagicMock(spec=acp.Client) + mock_conn.session_update = AsyncMock() + agent._conn = mock_conn + + new_resp = await agent.new_session(cwd="/tmp") + state = agent.session_manager.get_session(new_resp.session_id) + state.history = [ + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_todo_1", + "type": "function", + "function": { + "name": "todo", + "arguments": '{"todos":[{"id":"ship","content":"Ship it","status":"in_progress"}]}', + }, + } + ], + }, + { + "role": "tool", + "tool_call_id": "call_todo_1", + "content": '{"todos":[{"id":"ship","content":"Ship it","status":"in_progress"}]}', + }, + ] + + mock_conn.session_update.reset_mock() + resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id) + await asyncio.sleep(0) + await asyncio.sleep(0) + + assert isinstance(resp, LoadSessionResponse) + relevant_updates = [ + update for update in (call.kwargs["update"] for call in mock_conn.session_update.await_args_list) + if getattr(update, "session_update", None) in {"tool_call", "tool_call_update", "plan"} + ] + assert [getattr(update, "session_update", None) for update in relevant_updates] == [ + "tool_call", + "tool_call_update", + "plan", + ] + plan = relevant_updates[2] + assert isinstance(plan, AgentPlanUpdate) + assert [entry.content for entry in plan.entries] == ["Ship it"] + assert [entry.status for entry in plan.entries] == ["in_progress"] + @pytest.mark.asyncio async def test_resume_session_replays_persisted_history_to_client(self, agent): mock_conn = MagicMock(spec=acp.Client) From 622c27e55c58a0d11739a21ae29dd6d072230cf0 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:07:56 -0700 Subject: [PATCH 059/218] fix(install.ps1): restore EAP=Continue around uv python install, skip Store stub (#26586) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fresh Windows installs were failing on first run with: ⚠ uv python install error: Downloading cpython-3.11.15-windows-x86_64-none (24.5MiB) ✗ Installation failed: Python was not found; run without arguments to install from the Microsoft Store... Two bugs compounding: 1) EAP=Stop swallows uv's stderr progress as an exception. uv writes download progress ("Downloading cpython-3.11.15-windows-x86_64-none (24.5MiB)") to stderr. With $ErrorActionPreference = "Stop" set at the top of the script plus 2>&1 capture, PowerShell wraps each stderr line as an ErrorRecord and throws on the first one — even though uv exits 0 and Python was installed successfully. This was previously fixed in commit ec1714e71 (May 8) but lost in the May 12 release squash (413990c94). Reapply the EAP=Continue + verify-via 'uv python find' pattern. 2) System-python fallback invokes the Microsoft Store stub. When the uv paths fall through, the legacy 'python --version' check invokes %LOCALAPPDATA%\\Microsoft\\WindowsApps\\python.exe, a 0-byte reparse-point stub that prints 'Python was not found...' to stdout and exits non-zero. Get-Command matches it. The resulting error message is what the user sees as the final installer crash. Detect and skip the stub by checking for the \\WindowsApps\\ path component or a 0-byte file size before invoking python. Also save/restore EAP defensively in the catch blocks so a throw before the assignment can't leave EAP in 'Continue'. --- scripts/install.ps1 | 77 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/scripts/install.ps1 b/scripts/install.ps1 index 2cf81969beb..5ed7aa755fd 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -145,19 +145,39 @@ function Test-Python { # Python not found — use uv to install it (no admin needed!) Write-Info "Python $PythonVersion not found, installing via uv..." try { + # Temporarily relax ErrorActionPreference: uv writes download progress + # ("Downloading cpython-3.11.15-windows-x86_64-none (24.5MiB)") to + # stderr. With $ErrorActionPreference = "Stop" (set at the top of this + # script) PowerShell wraps stderr lines from native commands as + # ErrorRecord objects when captured via 2>&1, then throws a terminating + # exception on the first one — even though uv exits 0 and Python was + # installed successfully. Verify success via `uv python find` + # afterwards, which is the reliable signal regardless of exit-code + # semantics or stderr noise. This fix was previously landed as + # commit ec1714e71 and then lost in a release squash; reapplied here. + $prevEAP = $ErrorActionPreference + $ErrorActionPreference = "Continue" $uvOutput = & $UvCmd python install $PythonVersion 2>&1 - if ($LASTEXITCODE -eq 0) { - $pythonPath = & $UvCmd python find $PythonVersion 2>$null - if ($pythonPath) { - $ver = & $pythonPath --version 2>$null - Write-Success "Python installed: $ver" - return $true - } - } else { + $uvExitCode = $LASTEXITCODE + $ErrorActionPreference = $prevEAP + + # Check if Python is now available (more reliable than exit code + # since uv may return non-zero due to "already installed" etc.) + $pythonPath = & $UvCmd python find $PythonVersion 2>$null + if ($pythonPath) { + $ver = & $pythonPath --version 2>$null + Write-Success "Python installed: $ver" + return $true + } + + # uv ran but Python still not findable — show what happened + if ($uvExitCode -ne 0) { Write-Warn "uv python install output:" Write-Host $uvOutput -ForegroundColor DarkGray } } catch { + # Restore EAP in case the try block threw before the assignment + if ($prevEAP) { $ErrorActionPreference = $prevEAP } Write-Warn "uv python install error: $_" } @@ -175,15 +195,42 @@ function Test-Python { } catch { } } - # Fallback: try system python - if (Get-Command python -ErrorAction SilentlyContinue) { - $sysVer = python --version 2>$null - if ($sysVer -match "3\.(1[0-9]|[1-9][0-9])") { - Write-Success "Using system Python: $sysVer" - return $true + # Fallback: try system python — but skip the Microsoft Store stub. + # On Windows, %LOCALAPPDATA%\Microsoft\WindowsApps\python.exe is a 0-byte + # reparse-point stub that prints "Python was not found; run without + # arguments to install from the Microsoft Store..." to stdout and exits + # non-zero. Get-Command finds it; invoking it produces a confusing error + # that the user sees as our installer crashing. + $pythonCmd = Get-Command python -ErrorAction SilentlyContinue + if ($pythonCmd) { + $isStoreStub = $false + try { + $pythonSource = $pythonCmd.Source + if ($pythonSource -and $pythonSource -like "*\WindowsApps\*") { + $isStoreStub = $true + } else { + # Even outside WindowsApps, a 0-byte file is the stub + $item = Get-Item $pythonSource -ErrorAction SilentlyContinue + if ($item -and $item.Length -eq 0) { $isStoreStub = $true } + } + } catch { } + + if (-not $isStoreStub) { + try { + $prevEAP2 = $ErrorActionPreference + $ErrorActionPreference = "Continue" + $sysVer = & python --version 2>&1 + $ErrorActionPreference = $prevEAP2 + if ($sysVer -match "Python 3\.(1[0-9]|[1-9][0-9])") { + Write-Success "Using system Python: $sysVer" + return $true + } + } catch { + if ($prevEAP2) { $ErrorActionPreference = $prevEAP2 } + } } } - + Write-Err "Failed to install Python $PythonVersion" Write-Info "Install Python 3.11 manually, then re-run this script:" Write-Info " https://www.python.org/downloads/" From 3b9368a0c47176b449ea0254cdac31ec4d5ae925 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:27:50 -0700 Subject: [PATCH 060/218] fix(auth): point SSH OAuth users at the tunnel they actually need (#26592) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two loopback-redirect OAuth flows (xAI Grok, Spotify) silently fail when Hermes runs on a remote host: the auth server redirects to 127.0.0.1: on the user's laptop, not on the remote box. The --no-browser flag only suppresses webbrowser.open() — it doesn't change the bind address. Symptom xAI surfaces is 'Could not establish connection. We couldn't reach your app.', followed by a 'xAI authorization timed out waiting for the local callback' on the CLI side. Changes - hermes_cli/auth.py: new _print_loopback_ssh_hint() helper, called from _xai_oauth_loopback_login() and _spotify_login() right after they print the redirect URI. Silent off SSH; on SSH prints the exact 'ssh -N -L :127.0.0.1:' command using the actually-bound port (not the hardcoded constant — the listener auto-bumps when the preferred port is busy), a provider-specific docs URL, and a link to the new shared guide. - website/docs/guides/oauth-over-ssh.md (new): single source of truth for the tunnel pattern — TL;DR command, jump-box / ProxyJump variant, mosh+tmux+ControlMaster gotchas, troubleshooting. - website/docs/guides/xai-grok-oauth.md: fix the two sections that claimed --no-browser alone was enough; link to the shared guide. - website/docs/user-guide/features/spotify.md: expand the existing one-liner; link to the shared guide. - website/sidebars.ts: register the new page. - tests/hermes_cli/test_auth_loopback_ssh_hint.py: 7 unit tests covering SSH-vs-not, loopback-vs-not, malformed URIs, port echo, with and without provider docs URL. --- hermes_cli/auth.py | 46 ++++++ .../hermes_cli/test_auth_loopback_ssh_hint.py | 95 ++++++++++++ website/docs/guides/oauth-over-ssh.md | 137 ++++++++++++++++++ website/docs/guides/xai-grok-oauth.md | 23 ++- website/docs/user-guide/features/spotify.md | 8 +- website/sidebars.ts | 1 + 6 files changed, 304 insertions(+), 6 deletions(-) create mode 100644 tests/hermes_cli/test_auth_loopback_ssh_hint.py create mode 100644 website/docs/guides/oauth-over-ssh.md diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index c6dce709384..6cabb61570d 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -107,6 +107,9 @@ DEFAULT_SPOTIFY_REDIRECT_URI = "http://127.0.0.1:43827/spotify/callback" SPOTIFY_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/spotify" SPOTIFY_DASHBOARD_URL = "https://developer.spotify.com/dashboard" SPOTIFY_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 + +XAI_OAUTH_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/guides/xai-grok-oauth" +OAUTH_OVER_SSH_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/guides/oauth-over-ssh" DEFAULT_SPOTIFY_SCOPE = " ".join(( "user-modify-playback-state", "user-read-playback-state", @@ -2528,6 +2531,8 @@ def login_spotify_command(args) -> None: print(f"Full setup guide: {SPOTIFY_DOCS_URL}") print() + _print_loopback_ssh_hint(redirect_uri, docs_url=SPOTIFY_DOCS_URL) + if open_browser and not _is_remote_session(): try: opened = webbrowser.open(authorize_url) @@ -2584,6 +2589,45 @@ def _is_remote_session() -> bool: return bool(os.getenv("SSH_CLIENT") or os.getenv("SSH_TTY")) +def _print_loopback_ssh_hint(redirect_uri: str, *, docs_url: str | None = None) -> None: + """Print an SSH tunnel hint when running a loopback-redirect OAuth flow on a + remote host. The auth server (xAI, Spotify, ...) will redirect the user's + browser to ``127.0.0.1:/callback``. If the browser is on a different + machine than the loopback listener (the usual SSH case), the redirect can't + reach the listener without a local port forward. + + The hint is best-effort: silent if we don't think we're remote, or if we + can't parse a host/port out of the redirect URI. + + Pass ``docs_url`` for a provider-specific guide (e.g. the xAI Grok OAuth + page); the generic OAuth-over-SSH guide is always shown after it. + """ + if not _is_remote_session(): + return + try: + parsed = urlparse(redirect_uri) + except Exception: + return + host = parsed.hostname or "" + port = parsed.port + if host not in ("127.0.0.1", "::1", "localhost") or not port: + return + print() + print("Remote session detected. Your browser will redirect to") + print(f" {redirect_uri}") + print("which the loopback listener on THIS machine is waiting on. If your") + print("browser is on a different machine, forward the port first from your") + print("local machine in a separate terminal:") + print() + print(f" ssh -N -L {port}:127.0.0.1:{port} @") + print() + print("Then open the authorize URL above in your local browser.") + if docs_url: + print(f"Provider docs: {docs_url}") + print(f"SSH/jump-box guide: {OAUTH_OVER_SSH_DOCS_URL}") + print() + + # ============================================================================= # OpenAI Codex auth — tokens stored in ~/.hermes/auth.json (not ~/.codex/) # @@ -5297,6 +5341,8 @@ def _xai_oauth_loopback_login( print() print(f"Waiting for callback on {redirect_uri}") + _print_loopback_ssh_hint(redirect_uri, docs_url=XAI_OAUTH_DOCS_URL) + if open_browser and not _is_remote_session(): try: opened = webbrowser.open(authorize_url) diff --git a/tests/hermes_cli/test_auth_loopback_ssh_hint.py b/tests/hermes_cli/test_auth_loopback_ssh_hint.py new file mode 100644 index 00000000000..fb88a6bf4ce --- /dev/null +++ b/tests/hermes_cli/test_auth_loopback_ssh_hint.py @@ -0,0 +1,95 @@ +"""Unit tests for _print_loopback_ssh_hint() in hermes_cli/auth.py. + +The helper exists to warn users that loopback OAuth flows (xAI Grok OAuth, +Spotify) don't work over SSH unless they set up an `ssh -L` port forward +between their laptop's browser and the remote host's loopback listener. +""" + +from __future__ import annotations + +import io +import contextlib + +import pytest + +from hermes_cli import auth as auth_mod + + +def _cap(fn): + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + fn() + return buf.getvalue() + + +def test_loopback_ssh_hint_silent_when_not_remote(monkeypatch): + monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: False) + out = _cap(lambda: auth_mod._print_loopback_ssh_hint( + "http://127.0.0.1:56121/callback", docs_url=auth_mod.XAI_OAUTH_DOCS_URL + )) + assert out == "" + + +def test_loopback_ssh_hint_prints_tunnel_command_on_ssh(monkeypatch): + monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True) + out = _cap(lambda: auth_mod._print_loopback_ssh_hint( + "http://127.0.0.1:56121/callback", docs_url=auth_mod.XAI_OAUTH_DOCS_URL + )) + # Must include the exact ssh -L command with the port from the redirect URI + assert "ssh -N -L 56121:127.0.0.1:56121" in out + # Must include the provider-specific docs URL + assert auth_mod.XAI_OAUTH_DOCS_URL in out + # Must always include the cross-provider SSH guide + assert auth_mod.OAUTH_OVER_SSH_DOCS_URL in out + + +def test_loopback_ssh_hint_uses_actual_bound_port(monkeypatch): + """When the preferred port is busy, _xai_start_callback_server falls back to + an OS-assigned port. The hint must echo whichever port actually got bound, + not the hardcoded constant.""" + monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True) + out = _cap(lambda: auth_mod._print_loopback_ssh_hint( + "http://127.0.0.1:51234/callback", docs_url=auth_mod.XAI_OAUTH_DOCS_URL + )) + assert "ssh -N -L 51234:127.0.0.1:51234" in out + assert "56121" not in out + + +def test_loopback_ssh_hint_silent_for_non_loopback_uri(monkeypatch): + """Defense in depth: if a future caller passes a non-loopback redirect URI + by mistake, we don't tell the user to forward an external port.""" + monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True) + out = _cap(lambda: auth_mod._print_loopback_ssh_hint( + "https://example.com/callback", docs_url=auth_mod.XAI_OAUTH_DOCS_URL + )) + assert out == "" + + +def test_loopback_ssh_hint_silent_for_malformed_uri(monkeypatch): + monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True) + out = _cap(lambda: auth_mod._print_loopback_ssh_hint( + "not-a-uri", docs_url=auth_mod.XAI_OAUTH_DOCS_URL + )) + assert out == "" + + +def test_loopback_ssh_hint_works_without_provider_docs_url(monkeypatch): + monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True) + out = _cap(lambda: auth_mod._print_loopback_ssh_hint( + "http://127.0.0.1:43827/spotify/callback" + )) + assert "ssh -N -L 43827:127.0.0.1:43827" in out + # Generic SSH guide is always present even without a provider-specific URL + assert auth_mod.OAUTH_OVER_SSH_DOCS_URL in out + # Should not falsely show "Provider docs:" when no docs_url was passed + assert "Provider docs:" not in out + + +def test_loopback_ssh_hint_accepts_localhost_hostname(monkeypatch): + """The constant is 127.0.0.1, but parsing tolerates `localhost` too in case + a future caller normalizes the URI differently.""" + monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True) + out = _cap(lambda: auth_mod._print_loopback_ssh_hint( + "http://localhost:56121/callback" + )) + assert "ssh -N -L 56121:127.0.0.1:56121" in out diff --git a/website/docs/guides/oauth-over-ssh.md b/website/docs/guides/oauth-over-ssh.md new file mode 100644 index 00000000000..46a818a7934 --- /dev/null +++ b/website/docs/guides/oauth-over-ssh.md @@ -0,0 +1,137 @@ +--- +sidebar_position: 17 +title: "OAuth over SSH / Remote Hosts" +description: "How to complete browser-based OAuth (xAI, Spotify) when Hermes runs on a remote machine, container, or behind a jump box" +--- + +# OAuth over SSH / Remote Hosts + +Some Hermes providers — currently **xAI Grok OAuth** and **Spotify** — use a *loopback redirect* OAuth flow. The auth server (xAI, Spotify) redirects your browser to `http://127.0.0.1:/callback` so a tiny HTTP listener started by the `hermes auth ...` command can grab the authorization code. + +This works perfectly when Hermes and your browser are on the same machine. It breaks the moment they aren't: your laptop's browser tries to reach `127.0.0.1` on **your laptop**, but the listener is bound to `127.0.0.1` on **the remote server**. + +The fix is a one-line SSH local-forward. + +## TL;DR + +```bash +# On your local machine (laptop), in a separate terminal: +ssh -N -L 56121:127.0.0.1:56121 user@remote-host + +# In your existing SSH session on the remote machine: +hermes auth add xai-oauth --no-browser +# → Hermes prints an authorize URL. Open it in a browser on your laptop. +# → Your browser redirects to 127.0.0.1:56121/callback, the tunnel forwards +# the request to the remote listener, login completes. +``` + +Port `56121` is what xAI OAuth uses. For Spotify, replace it with `43827`. Hermes prints the exact port it bound to on the `Waiting for callback on ...` line — copy it from there. + +## Which Providers Need This + +| Provider | Loopback port | Tunnel needed? | +|----------|---------------|----------------| +| `xai-oauth` (Grok SuperGrok) | `56121` | Yes, when Hermes is remote | +| Spotify | `43827` | Yes, when Hermes is remote | +| `anthropic` (Claude Pro/Max) | n/a | No — paste-the-code flow | +| `openai-codex` (ChatGPT Plus/Pro) | n/a | No — device code flow | +| `minimax`, `nous-portal` | n/a | No — device code flow | + +If your provider isn't in the table, you don't need a tunnel. + +## Why the listener can't just bind 0.0.0.0 + +xAI and Spotify both validate the `redirect_uri` parameter against an allowlist. Both require the loopback form (`http://127.0.0.1:/callback`). Binding the listener to `0.0.0.0` or a different port would cause the auth server to reject the request as a redirect_uri mismatch. The SSH tunnel keeps the loopback URI intact end-to-end. + +## Step-by-step: single SSH hop + +### 1. Start the tunnel from your local machine + +```bash +# xAI Grok OAuth (port 56121) +ssh -N -L 56121:127.0.0.1:56121 user@remote-host + +# Or for Spotify (port 43827) +ssh -N -L 43827:127.0.0.1:43827 user@remote-host +``` + +`-N` means "don't open a remote shell, just hold the tunnel open." Keep this terminal running for the duration of the login. + +### 2. In a separate SSH session, run the auth command + +```bash +ssh user@remote-host +hermes auth add xai-oauth --no-browser +# or for Spotify: +# hermes auth add spotify --no-browser +``` + +Hermes detects the SSH session, skips the browser auto-open, and prints an authorize URL plus a `Waiting for callback on http://127.0.0.1:/callback` line. + +### 3. Open the URL in your local browser + +Copy the authorize URL from the remote terminal and paste it into the browser on your laptop. Approve the consent screen. The auth server redirects to `http://127.0.0.1:/callback`. Your browser hits the tunnel, the request is forwarded to the remote listener, and Hermes prints `Login successful!`. + +You can tear down the tunnel (Ctrl+C in the first terminal) once you see the success line. + +## Step-by-step: through a jump box + +If you reach Hermes through a bastion / jump host, use SSH's built-in `-J` (ProxyJump): + +```bash +ssh -N -L 56121:127.0.0.1:56121 -J jump-user@jump-host user@final-host +``` + +This chains a SSH connection through the jump host without putting the loopback port on the jump box itself. The local `127.0.0.1:56121` on your laptop tunnels straight through to `127.0.0.1:56121` on the final remote host. + +For older OpenSSH that doesn't support `-J`, the long form is: + +```bash +ssh -N \ + -o "ProxyCommand=ssh -W %h:%p jump-user@jump-host" \ + -L 56121:127.0.0.1:56121 \ + user@final-host +``` + +## Mosh, tmux, ssh ControlMaster + +The tunnel is a property of the underlying SSH connection. If you're running Hermes inside `tmux` over a mosh session, the mosh roaming doesn't carry the `-L` forwarding. Open a *separate* plain SSH session **only** for the `-L` tunnel — that's the connection that has to stay alive during the auth flow. Your interactive mosh/tmux session can keep running Hermes normally. + +If you use `ssh -o ControlMaster=auto`, port forwards on a multiplexed connection share the master's lifetime. Restart the master if the tunnel doesn't come up: + +```bash +ssh -O exit user@remote-host +ssh -N -L 56121:127.0.0.1:56121 user@remote-host +``` + +## Troubleshooting + +### `bind [127.0.0.1]:56121: Address already in use` + +Something on your laptop is already using that port. Either the previous tunnel didn't shut down cleanly, or a local Hermes is also listening on it. Find and kill the offender: + +```bash +# macOS / Linux +lsof -iTCP:56121 -sTCP:LISTEN +kill +``` + +Then retry the `ssh -L` command. + +### "Could not establish connection. We couldn't reach your app." (xAI) + +xAI's authorize page shows this when its redirect to `127.0.0.1:/callback` doesn't reach a listener. Either the tunnel isn't running, the port is wrong, or you're using the port Hermes printed in a previous run (the port can be auto-bumped if the preferred one is busy — always read the latest `Waiting for callback on ...` line). + +### `xAI authorization timed out waiting for the local callback` + +Same root cause as above — the redirect never made it back. Check the tunnel is still alive (`ssh -N` doesn't show output, so look at the terminal you started it from), restart it if needed, and re-run `hermes auth add xai-oauth --no-browser`. + +### Tokens land in the wrong `~/.hermes` + +The tokens are written under the Linux user that ran `hermes auth add ...`. If your gateway / systemd service runs as a different user (e.g. `root` or a dedicated `hermes` user), authenticate as **that** user so the tokens land in their `~/.hermes/auth.json`. `sudo -u hermes -i` or equivalent. + +## See Also + +- [xAI Grok OAuth](./xai-grok-oauth.md) +- [Spotify (`Running over SSH`)](../user-guide/features/spotify.md#running-over-ssh--in-a-headless-environment) +- [SSH `-J` / ProxyJump (man page)](https://man.openbsd.org/ssh#J) diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md index 5afccb6d881..95167a2430c 100644 --- a/website/docs/guides/xai-grok-oauth.md +++ b/website/docs/guides/xai-grok-oauth.md @@ -59,14 +59,23 @@ hermes auth add xai-oauth ### Remote / headless sessions -On servers, containers, or SSH sessions where no browser is available, Hermes detects the remote environment and prints the authorization URL instead of opening a browser. Open the URL on any device with a browser, complete the consent flow, and Hermes finishes the loopback exchange when the redirect comes back. +On servers, containers, or SSH sessions where no browser is available, Hermes detects the remote environment and prints the authorization URL instead of opening a browser. -If you need to force this behaviour explicitly: +**Important:** the loopback listener still runs on the remote machine at `127.0.0.1:56121`. The xAI redirect needs to reach *that* listener, so opening the URL on your laptop will fail (`Could not establish connection. We couldn't reach your app.`) unless you forward the port: ```bash +# In a separate terminal on your local machine: +ssh -N -L 56121:127.0.0.1:56121 user@remote-host + +# Then in your SSH session on the remote machine: hermes auth add xai-oauth --no-browser +# Open the printed authorize URL in your local browser. ``` +Through a jump box / bastion: add `-J jump-user@jump-host`. + +See [OAuth over SSH / Remote Hosts](./oauth-over-ssh.md) for the full step-by-step, including ProxyJump chains, mosh/tmux, and ControlMaster gotchas. + ## How the Login Works 1. Hermes opens your browser to `accounts.x.ai`. @@ -182,14 +191,18 @@ Hermes detected that the `state` value returned by the authorization server does ### Logging in from a remote server -On SSH or container sessions Hermes prints the authorization URL instead of opening a browser. Open the URL on any device with a browser and complete the consent there — the loopback callback comes back to your remote host. - -You can also force this behaviour: +On SSH or container sessions Hermes prints the authorization URL instead of opening a browser. The loopback callback listener still binds `127.0.0.1:56121` on the remote host — your laptop's browser can't reach it without an SSH local-forward: ```bash +# Local machine, separate terminal: +ssh -N -L 56121:127.0.0.1:56121 user@remote-host + +# Remote machine: hermes auth add xai-oauth --no-browser ``` +Full walkthrough (jump boxes, mosh/tmux, port conflicts): [OAuth over SSH / Remote Hosts](./oauth-over-ssh.md). + ### "No xAI credentials found" error at runtime The auth store has no `xai-oauth` entry and no `XAI_API_KEY` is set. You haven't logged in yet, or the credential file was deleted. diff --git a/website/docs/user-guide/features/spotify.md b/website/docs/user-guide/features/spotify.md index bf9d652b318..5e57688e48f 100644 --- a/website/docs/user-guide/features/spotify.md +++ b/website/docs/user-guide/features/spotify.md @@ -68,7 +68,13 @@ Agree to the terms and click **Save**. On the next page click **Settings** → c ### Running over SSH / in a headless environment -If `SSH_CLIENT` or `SSH_TTY` is set, Hermes skips the automatic browser open during both the wizard and the OAuth step. Copy the dashboard URL and the authorization URL Hermes prints, open them in a browser on your local machine, and proceed normally — the local HTTP listener still runs on the remote host on port 43827. If you need to reach it through an SSH tunnel, forward that port: `ssh -L 43827:127.0.0.1:43827 remote`. +If `SSH_CLIENT` or `SSH_TTY` is set, Hermes skips the automatic browser open during both the wizard and the OAuth step. Copy the dashboard URL and the authorization URL Hermes prints, open them in a browser on your local machine, and proceed normally — the local HTTP listener still runs on the remote host on port `43827`. Your laptop's browser can't reach the remote loopback without an SSH local-forward: + +```bash +ssh -N -L 43827:127.0.0.1:43827 user@remote-host +``` + +For jump-box / bastion setups and other gotchas (mosh, tmux, port conflicts), see [OAuth over SSH / Remote Hosts](../../guides/oauth-over-ssh.md). ## Verify diff --git a/website/sidebars.ts b/website/sidebars.ts index a0fb24b8c50..f0a0658c3bf 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -192,6 +192,7 @@ const sidebars: SidebarsConfig = { 'guides/aws-bedrock', 'guides/azure-foundry', 'guides/xai-grok-oauth', + 'guides/oauth-over-ssh', 'guides/microsoft-graph-app-registration', 'guides/operate-teams-meeting-pipeline', ], From 518f39557b6753a5dc766a05dd14dd5cf2b9edeb Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:32:14 -0700 Subject: [PATCH 061/218] fix(gateway): keep running when platforms fail; add per-platform circuit breaker + /platform (#26600) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop the gateway from exiting (or systemd-restart-looping) when a single messaging adapter fails at startup or runtime. A misconfigured WhatsApp (npm install timeout, unpaired bridge, missing creds.json) used to take the entire gateway down, killing cron jobs and any other connected platforms with it. Changes: • Startup (gateway/run.py): when connected_count==0 but the only errors are retryable, log a degraded-state warning and keep the gateway alive instead of returning False. Reconnect watcher then recovers platforms as their underlying problem clears. • Runtime (gateway/run.py _handle_adapter_fatal_error): when the last adapter goes down with a retryable error and is queued for reconnection, stay alive instead of exit-with-failure. Previously this triggered systemd Restart=on-failure, which created infinite restart loops on persistent retryable failures (proxy outage, repeated bridge crashes). • Reconnect watcher (gateway/run.py _platform_reconnect_watcher): replace the 20-attempt hard drop with a circuit-breaker pause. After _PAUSE_AFTER_FAILURES (10) consecutive retryable failures, the platform stays in _failed_platforms with paused=True so the watcher skips it but the operator can still see and resume it. Non-retryable errors still drop out of the queue immediately. Resolves #17063 (gateway giving up on Telegram after 20 attempts). • WhatsApp preflight (gateway/platforms/whatsapp.py): refuse to start the Node bridge when creds.json is missing. Sets a non-retryable whatsapp_not_paired fatal error so the watcher drops it cleanly with a single 'run hermes whatsapp' log line instead of paying the 30s bridge bootstrap timeout on every gateway start. • WhatsApp setup ordering (hermes_cli/main.py cmd_whatsapp): only set WHATSAPP_ENABLED=true once pairing actually succeeds. Previously the wizard wrote the env var at step 2 (before npm install and QR pairing), so any Ctrl+C left .env claiming WhatsApp was ready when the bridge had no creds.json. Also propagate the env var when the user keeps an existing pairing on a re-run. • /platform slash command (hermes_cli/commands.py + gateway/run.py): new gateway-only command for manual circuit-breaker control. /platform list — show connected + failed/paused platforms /platform pause — silence a known-broken platform /platform resume — re-queue a paused platform Tests: • New: pause/resume helpers, /platform list|pause|resume command, WhatsApp creds.json preflight, WhatsApp setup ordering. • Updated: stale assertions that codified the old 'exit and let systemd restart' behavior in test_runner_fatal_adapter.py, test_runner_startup_failures.py, and test_platform_reconnect.py (the 20-attempt give-up test became a circuit-breaker pause test). 5488 tests pass in tests/gateway/. --- gateway/platforms/whatsapp.py | 34 ++- gateway/run.py | 255 +++++++++++++++--- hermes_cli/commands.py | 2 + hermes_cli/main.py | 27 +- tests/gateway/test_platform_reconnect.py | 230 +++++++++++++++- tests/gateway/test_runner_fatal_adapter.py | 12 +- tests/gateway/test_runner_startup_failures.py | 17 +- tests/gateway/test_whatsapp_connect.py | 90 +++++++ .../test_whatsapp_setup_ordering.py | 140 ++++++++++ 9 files changed, 745 insertions(+), 62 deletions(-) create mode 100644 tests/hermes_cli/test_whatsapp_setup_ordering.py diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 5239df3b5ae..0ca3d41fabb 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -493,13 +493,45 @@ class WhatsAppAdapter(BasePlatformAdapter): """ if not check_whatsapp_requirements(): logger.warning("[%s] Node.js not found. WhatsApp requires Node.js.", self.name) + self._set_fatal_error( + "whatsapp_node_missing", + "Node.js is not installed — install Node.js and re-run `hermes gateway`.", + retryable=False, + ) return False bridge_path = Path(self._bridge_script) if not bridge_path.exists(): logger.warning("[%s] Bridge script not found: %s", self.name, bridge_path) + self._set_fatal_error( + "whatsapp_bridge_missing", + f"WhatsApp bridge script missing at {bridge_path}.", + retryable=False, + ) return False - + + # Pre-flight: skip the 30s bridge bootstrap entirely if the user + # never finished pairing. Without creds.json the bridge prints + # QR codes to its log file and never reaches status:connected, + # so every gateway restart paid the 30s timeout + queued WhatsApp + # for indefinite retries. Mark non-retryable so the user gets a + # clear "run hermes whatsapp" message instead of the watcher + # silently hammering an unconfigured platform. + creds_path = self._session_path / "creds.json" + if not creds_path.exists(): + logger.warning( + "[%s] WhatsApp is enabled but not paired (no creds.json at %s). " + "Run `hermes whatsapp` to pair, or remove WHATSAPP_ENABLED from " + "your .env to disable.", + self.name, creds_path, + ) + self._set_fatal_error( + "whatsapp_not_paired", + "WhatsApp enabled but not paired — run `hermes whatsapp` to pair.", + retryable=False, + ) + return False + logger.info("[%s] Bridge found at %s", self.name, bridge_path) # Acquire scoped lock to prevent duplicate sessions diff --git a/gateway/run.py b/gateway/run.py index f41357673f7..f9a282a413f 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1990,21 +1990,21 @@ class GatewayRunner: await self.stop() elif not self.adapters and self._failed_platforms: # All platforms are down and queued for background reconnection. - # If the error is retryable, exit with failure so systemd Restart=on-failure - # can restart the process. Otherwise stay alive and keep retrying in background. - if adapter.fatal_error_retryable: - self._exit_reason = adapter.fatal_error_message or "All messaging platforms failed with retryable errors" - self._exit_with_failure = True - logger.error( - "All messaging platforms failed with retryable errors. " - "Shutting down gateway for service restart (systemd will retry)." - ) - await self.stop() - else: - logger.warning( - "No connected messaging platforms remain, but %d platform(s) queued for reconnection", - len(self._failed_platforms), - ) + # Keep the gateway alive so: + # • cron jobs still run + # • the reconnect watcher can recover platforms when the + # underlying problem clears (proxy comes back, user runs + # `hermes whatsapp`, etc.) + # We used to exit-with-failure here to trigger systemd restart, + # but that converted a transient outage into a restart loop and + # killed in-process state every time. The reconnect watcher + # already handles long-running recovery — let it do its job. + logger.warning( + "No connected messaging platforms remain, but %d platform(s) " + "queued for reconnection — gateway staying alive, watcher will " + "retry in background.", + len(self._failed_platforms), + ) def _request_clean_exit(self, reason: str) -> None: self._exit_cleanly = True @@ -2180,6 +2180,73 @@ class GatewayRunner: except Exception: pass + # ------------------------------------------------------------------ + # Per-platform circuit breaker (pause/resume) — used by the reconnect + # watcher when a retryable failure recurs past a threshold, and by the + # /platform pause|resume slash command for manual control. + # ------------------------------------------------------------------ + def _pause_failed_platform(self, platform, *, reason: str = "") -> None: + """Mark a queued platform as paused — keep it in ``_failed_platforms`` + but stop the reconnect watcher from hammering it. + + Used by the circuit breaker after ``_PAUSE_AFTER_FAILURES`` consecutive + retryable failures, and by ``/platform pause `` for manual + intervention. Paused platforms are surfaced in ``/platform list`` + and resumed with ``/platform resume ``. + """ + info = getattr(self, "_failed_platforms", {}).get(platform) + if info is None: + return + if info.get("paused"): + return + info["paused"] = True + info["pause_reason"] = reason or "auto-paused after repeated failures" + # Push next_retry far enough out that even if "paused" is missed + # by a stale code path, the watcher won't fire on it. + info["next_retry"] = float("inf") + try: + self._update_platform_runtime_status( + platform.value, + platform_state="paused", + error_code=None, + error_message=info["pause_reason"], + ) + except Exception: + pass + logger.warning( + "%s paused after %d consecutive failures (%s) — " + "fix the underlying issue then run `/platform resume %s` " + "to retry, or `hermes gateway restart` to restart the gateway.", + platform.value, info.get("attempts", 0), + info["pause_reason"], platform.value, + ) + + def _resume_paused_platform(self, platform) -> bool: + """Unpause a platform — reset its attempt counter and schedule an + immediate retry. Returns True if the platform was paused and is + now queued; False if it wasn't paused (or wasn't in the queue). + """ + info = getattr(self, "_failed_platforms", {}).get(platform) + if info is None: + return False + if not info.get("paused"): + return False + info["paused"] = False + info.pop("pause_reason", None) + info["attempts"] = 0 + info["next_retry"] = time.monotonic() # retry on next watcher tick + try: + self._update_platform_runtime_status( + platform.value, + platform_state="retrying", + error_code=None, + error_message=None, + ) + except Exception: + pass + logger.info("%s resumed — retrying on next watcher tick", platform.value) + return True + @staticmethod def _load_prefill_messages() -> List[Dict[str, Any]]: """Load ephemeral prefill messages from config or env var. @@ -3613,16 +3680,32 @@ class GatewayRunner: return True if enabled_platform_count > 0: if startup_retryable_errors: - # At least one platform attempted a connection and failed — - # this is a real startup error that should block the gateway. + # All enabled platforms hit retryable failures (network + # blip, bridge not paired, npm install timeout, etc.). + # Keep the gateway alive so: + # • cron jobs still run + # • the reconnect watcher gets a chance to recover the + # failing platforms once the underlying problem is + # fixed (e.g. user runs `hermes whatsapp`, fixes + # proxy, etc.) + # Exiting here used to convert a single misconfigured + # platform into an infinite systemd restart loop. reason = "; ".join(startup_retryable_errors) - logger.error("Gateway failed to connect any configured messaging platform: %s", reason) + logger.warning( + "Gateway started with no connected platforms — " + "%d platform(s) queued for retry: %s", + len(self._failed_platforms), reason, + ) try: from gateway.status import write_runtime_status - write_runtime_status(gateway_state="startup_failed", exit_reason=reason) + write_runtime_status( + gateway_state="degraded", + exit_reason=None, + ) except Exception: pass - return False + # Fall through to the normal "running" state — reconnect + # watcher takes it from here. # All enabled platforms had no adapter (missing library or credentials). # In fleet deployments the same config.yaml is shared across nodes that # may only have credentials for a subset of platforms. Rather than @@ -4737,11 +4820,15 @@ class GatewayRunner: """Background task that periodically retries connecting failed platforms. Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap). - Stops retrying a platform after 20 failed attempts or if the error - is non-retryable (e.g. bad auth token). + Retryable failures keep retrying at the backoff cap indefinitely + — but if a platform fails ``_PAUSE_AFTER_FAILURES`` times in a row + without ever succeeding, it is *paused*: kept in the retry queue + but no longer hammered. The user surfaces it with ``/platform list`` + and resumes it with ``/platform resume ``. Non-retryable + failures (bad auth, etc.) still drop out of the queue immediately. """ - _MAX_ATTEMPTS = 20 _BACKOFF_CAP = 300 # 5 minutes max between retries + _PAUSE_AFTER_FAILURES = 10 # circuit-breaker threshold await asyncio.sleep(10) # initial delay — let startup finish while self._running: @@ -4758,22 +4845,18 @@ class GatewayRunner: if not self._running: return info = self._failed_platforms[platform] + # Skip paused platforms entirely — they need explicit + # /platform resume to come back. + if info.get("paused"): + continue if now < info["next_retry"]: continue # not time yet - if info["attempts"] >= _MAX_ATTEMPTS: - logger.warning( - "Giving up reconnecting %s after %d attempts", - platform.value, info["attempts"], - ) - del self._failed_platforms[platform] - continue - platform_config = info["config"] attempt = info["attempts"] + 1 logger.info( - "Reconnecting %s (attempt %d/%d)...", - platform.value, attempt, _MAX_ATTEMPTS, + "Reconnecting %s (attempt %d)...", + platform.value, attempt, ) try: @@ -4838,6 +4921,14 @@ class GatewayRunner: "Reconnect %s failed, next retry in %ds", platform.value, backoff, ) + if attempt >= _PAUSE_AFTER_FAILURES: + self._pause_failed_platform( + platform, + reason=( + adapter.fatal_error_message + or "failed to reconnect" + ), + ) except Exception as e: self._update_platform_runtime_status( platform.value, @@ -4852,6 +4943,8 @@ class GatewayRunner: "Reconnect %s error: %s, next retry in %ds", platform.value, e, backoff, ) + if attempt >= _PAUSE_AFTER_FAILURES: + self._pause_failed_platform(platform, reason=str(e)) # Check every 10 seconds for platforms that need reconnection for _ in range(10): @@ -6451,6 +6544,9 @@ class GatewayRunner: if canonical == "agents": return await self._handle_agents_command(event) + if canonical == "platform": + return await self._handle_platform_command(event) + if canonical == "restart": return await self._handle_restart_command(event) @@ -8698,6 +8794,99 @@ class GatewayRunner: else: return t("gateway.stop.no_active") + async def _handle_platform_command(self, event: MessageEvent) -> str: + """Handle ``/platform list|pause|resume [name]`` — surface and + manually control failed/paused gateway adapters. + + Examples: + ``/platform list`` — show connected + failed/paused platforms + ``/platform pause whatsapp`` — stop the reconnect watcher hammering whatsapp + ``/platform resume whatsapp`` — re-queue a paused platform for retry + """ + text = (getattr(event, "content", "") or "").strip() + # Strip the leading "/platform" (or "/PLATFORM") token if present + parts = text.split(maxsplit=2) + if parts and parts[0].lower().lstrip("/").startswith("platform"): + parts = parts[1:] + action = (parts[0] if parts else "list").lower() + target = parts[1].lower() if len(parts) > 1 else "" + + # Resolve platform name (case-insensitive, value match) + def _resolve_platform(name: str): + if not name: + return None + for p in Platform.__members__.values(): + if p.value.lower() == name: + return p + return None + + if action == "list": + lines = ["**Gateway platforms**"] + connected = sorted(p.value for p in self.adapters.keys()) + if connected: + lines.append("Connected: " + ", ".join(connected)) + else: + lines.append("Connected: (none)") + failed = getattr(self, "_failed_platforms", {}) or {} + if failed: + for p, info in failed.items(): + if info.get("paused"): + reason = info.get("pause_reason") or "paused" + lines.append( + f" · {p.value} — PAUSED ({reason}). " + f"Resume with `/platform resume {p.value}`." + ) + else: + attempts = info.get("attempts", 0) + lines.append( + f" · {p.value} — retrying (attempt {attempts})" + ) + else: + lines.append("Failed/paused: (none)") + return "\n".join(lines) + + if action in ("pause", "resume"): + if not target: + return f"Usage: /platform {action} " + platform = _resolve_platform(target) + if platform is None: + return f"Unknown platform: {target}" + failed = getattr(self, "_failed_platforms", {}) or {} + if action == "pause": + if platform not in failed: + return ( + f"{platform.value} is not in the retry queue " + f"(it's either connected or not enabled)." + ) + if failed[platform].get("paused"): + return f"{platform.value} is already paused." + self._pause_failed_platform(platform, reason="paused via /platform pause") + return ( + f"✓ {platform.value} paused. " + f"Resume with `/platform resume {platform.value}` or " + f"`hermes gateway restart` to reset." + ) + # action == "resume" + if platform not in failed: + return ( + f"{platform.value} is not in the retry queue — " + f"nothing to resume." + ) + if not failed[platform].get("paused"): + return ( + f"{platform.value} is already retrying — " + f"no resume needed." + ) + self._resume_paused_platform(platform) + return f"✓ {platform.value} resumed — retrying on next watcher tick." + + return ( + "Usage: /platform [name]\n" + " /platform list — show platform status\n" + " /platform pause — stop retrying a failing platform\n" + " /platform resume — re-queue a paused platform" + ) + async def _handle_restart_command(self, event: MessageEvent) -> Union[str, EphemeralReply]: """Handle /restart command - drain active work, then restart the gateway.""" # Defensive idempotency check: if the previous gateway process diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index b3556d3932d..83d86c4a3a9 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -198,6 +198,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ args_hint="[days]"), CommandDef("platforms", "Show gateway/messaging platform status", "Info", cli_only=True, aliases=("gateway",)), + CommandDef("platform", "Pause, resume, or list a failing gateway platform", "Info", + gateway_only=True, args_hint=" [name]"), CommandDef("copy", "Copy the last assistant response to clipboard", "Info", cli_only=True, args_hint="[number]"), CommandDef("paste", "Attach clipboard image from your clipboard", "Info", diff --git a/hermes_cli/main.py b/hermes_cli/main.py index c2c8a6880d2..7eedc3fd322 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1522,14 +1522,18 @@ def cmd_whatsapp(args): ) print(f"\n✓ Mode: {mode_label}") - # ── Step 2: Enable WhatsApp ────────────────────────────────────────── + # ── Step 2: Mode is selected, will enable WhatsApp only after pairing ── + # We intentionally don't write WHATSAPP_ENABLED=true here. If the user + # aborts the wizard later (Ctrl+C, failed npm install, missed QR scan), + # we'd otherwise leave .env claiming WhatsApp is ready when the bridge + # has no creds.json. Every subsequent `hermes gateway` then paid a 30s + # bridge-bootstrap timeout and queued WhatsApp for indefinite retries. + # Now: aborted setup leaves WHATSAPP_ENABLED unset → gateway skips it. + # Re-runs that already have WHATSAPP_ENABLED=true (from a prior + # successful pairing) stay enabled — we just don't write it pre-emptively. print() - current = get_env_value("WHATSAPP_ENABLED") - if current and current.lower() == "true": + if (get_env_value("WHATSAPP_ENABLED") or "").lower() == "true": print("✓ WhatsApp is already enabled") - else: - save_env_value("WHATSAPP_ENABLED", "true") - print("✓ WhatsApp enabled") # ── Step 3: Allowed users ──────────────────────────────────────────── current_users = get_env_value("WHATSAPP_ALLOWED_USERS") or "" @@ -1619,6 +1623,12 @@ def cmd_whatsapp(args): session_dir.mkdir(parents=True, exist_ok=True) print(" ✓ Session cleared") else: + # Existing pairing — ensure WHATSAPP_ENABLED reflects that. + # (Older installs may have lost the env var; covers re-runs + # where the user picked "no, keep my session" but the var + # was never set or got removed.) + if (get_env_value("WHATSAPP_ENABLED") or "").lower() != "true": + save_env_value("WHATSAPP_ENABLED", "true") print("\n✓ WhatsApp is configured and paired!") print(" Start the gateway with: hermes gateway") return @@ -1647,6 +1657,11 @@ def cmd_whatsapp(args): # ── Step 7: Post-pairing ───────────────────────────────────────────── print() if (session_dir / "creds.json").exists(): + # Only enable WhatsApp now that pairing actually succeeded. If the + # user Ctrl+C'd at any earlier step, WHATSAPP_ENABLED stays unset + # and `hermes gateway` skips it cleanly instead of paying a 30s + # bridge timeout + queueing the platform for indefinite retries. + save_env_value("WHATSAPP_ENABLED", "true") print("✓ WhatsApp paired successfully!") print() if wa_mode == "bot": diff --git a/tests/gateway/test_platform_reconnect.py b/tests/gateway/test_platform_reconnect.py index a0bd7ab9eec..e4362a02562 100644 --- a/tests/gateway/test_platform_reconnect.py +++ b/tests/gateway/test_platform_reconnect.py @@ -294,15 +294,63 @@ class TestPlatformReconnectWatcher: assert runner._failed_platforms[Platform.TELEGRAM]["attempts"] == 2 @pytest.mark.asyncio - async def test_reconnect_gives_up_after_max_attempts(self): - """After max attempts, platform should be removed from retry queue.""" + async def test_reconnect_pauses_after_circuit_breaker_threshold(self): + """After enough consecutive retryable failures, the watcher should + *pause* the platform (keep it in the queue but stop hammering it), + not drop it. The user resumes via /platform resume. + """ + runner = _make_runner() + + platform_config = PlatformConfig(enabled=True, token="test") + # 9 prior attempts — the next failure will be the 10th and should + # trip the circuit breaker. + runner._failed_platforms[Platform.TELEGRAM] = { + "config": platform_config, + "attempts": 9, + "next_retry": time.monotonic() - 1, + } + + fail_adapter = StubAdapter( + succeed=False, fatal_error="DNS failure", fatal_retryable=True + ) + real_sleep = asyncio.sleep + + with patch.object(runner, "_create_adapter", return_value=fail_adapter): + async def run_one_iteration(): + runner._running = True + call_count = 0 + + async def fake_sleep(n): + nonlocal call_count + call_count += 1 + if call_count > 1: + runner._running = False + await real_sleep(0) + + with patch("asyncio.sleep", side_effect=fake_sleep): + await runner._platform_reconnect_watcher() + + await run_one_iteration() + + # Platform stays in queue — paused, not dropped + assert Platform.TELEGRAM in runner._failed_platforms + info = runner._failed_platforms[Platform.TELEGRAM] + assert info["paused"] is True + assert info["attempts"] == 10 + assert "pause_reason" in info + + @pytest.mark.asyncio + async def test_reconnect_skips_paused_platforms(self): + """A paused platform should not be retried by the watcher tick.""" runner = _make_runner() platform_config = PlatformConfig(enabled=True, token="test") runner._failed_platforms[Platform.TELEGRAM] = { "config": platform_config, - "attempts": 20, # At max - "next_retry": time.monotonic() - 1, + "attempts": 10, + "next_retry": time.monotonic() - 1, # would normally retry now + "paused": True, + "pause_reason": "paused via /platform pause", } real_sleep = asyncio.sleep @@ -324,8 +372,10 @@ class TestPlatformReconnectWatcher: await run_one_iteration() - assert Platform.TELEGRAM not in runner._failed_platforms - mock_create.assert_not_called() # Should give up without trying + # Paused platform stays queued and was never touched + assert Platform.TELEGRAM in runner._failed_platforms + assert runner._failed_platforms[Platform.TELEGRAM]["paused"] is True + mock_create.assert_not_called() @pytest.mark.asyncio async def test_reconnect_skips_when_not_time_yet(self): @@ -459,11 +509,12 @@ class TestRuntimeDisconnectQueuing: assert Platform.TELEGRAM not in runner._failed_platforms @pytest.mark.asyncio - async def test_retryable_error_exits_for_service_restart_when_all_down(self): - """Gateway should exit with failure when all platforms fail with retryable errors. - - This lets systemd Restart=on-failure restart the process, which is more - reliable than in-process background reconnection after exhausted retries. + async def test_retryable_error_keeps_gateway_alive_when_all_down(self): + """When all adapters fail at runtime with retryable errors, the + gateway should stay alive and let the reconnect watcher recover them + in the background. (Previously this exited-with-failure to trigger + a systemd restart — that converted transient outages into infinite + restart loops and killed in-process state.) """ runner = _make_runner() runner.stop = AsyncMock() @@ -474,9 +525,9 @@ class TestRuntimeDisconnectQueuing: await runner._handle_adapter_fatal_error(adapter) - # stop() SHOULD be called — gateway exits for systemd restart - runner.stop.assert_called_once() - assert runner._exit_with_failure is True + # stop() should NOT be called — gateway stays alive for the watcher + runner.stop.assert_not_called() + assert runner._exit_with_failure is False assert Platform.TELEGRAM in runner._failed_platforms @pytest.mark.asyncio @@ -512,3 +563,154 @@ class TestRuntimeDisconnectQueuing: await runner._handle_adapter_fatal_error(adapter) runner.stop.assert_called_once() + + +# --- Pause / resume circuit breaker --- + + +class TestPauseResume: + """Test the per-platform pause/resume helpers and slash command.""" + + def test_pause_marks_platform_paused(self): + runner = _make_runner() + runner._failed_platforms[Platform.TELEGRAM] = { + "config": PlatformConfig(enabled=True, token="t"), + "attempts": 3, + "next_retry": time.monotonic() + 30, + } + runner._pause_failed_platform(Platform.TELEGRAM, reason="manual") + info = runner._failed_platforms[Platform.TELEGRAM] + assert info["paused"] is True + assert info["pause_reason"] == "manual" + assert info["next_retry"] == float("inf") + + def test_pause_is_idempotent(self): + runner = _make_runner() + runner._failed_platforms[Platform.TELEGRAM] = { + "config": PlatformConfig(enabled=True, token="t"), + "attempts": 3, + "next_retry": time.monotonic() + 30, + "paused": True, + "pause_reason": "first reason", + } + runner._pause_failed_platform(Platform.TELEGRAM, reason="second reason") + # Reason should not be overwritten on a second pause call. + assert ( + runner._failed_platforms[Platform.TELEGRAM]["pause_reason"] + == "first reason" + ) + + def test_pause_no_op_when_platform_not_queued(self): + runner = _make_runner() + # No exception even when the platform isn't in _failed_platforms. + runner._pause_failed_platform(Platform.TELEGRAM, reason="x") + assert Platform.TELEGRAM not in runner._failed_platforms + + def test_resume_clears_paused_and_resets_attempts(self): + runner = _make_runner() + runner._failed_platforms[Platform.TELEGRAM] = { + "config": PlatformConfig(enabled=True, token="t"), + "attempts": 10, + "next_retry": float("inf"), + "paused": True, + "pause_reason": "auto-paused", + } + assert runner._resume_paused_platform(Platform.TELEGRAM) is True + info = runner._failed_platforms[Platform.TELEGRAM] + assert info["paused"] is False + assert info["attempts"] == 0 + assert info["next_retry"] != float("inf") + assert "pause_reason" not in info + + def test_resume_returns_false_when_not_paused(self): + runner = _make_runner() + runner._failed_platforms[Platform.TELEGRAM] = { + "config": PlatformConfig(enabled=True, token="t"), + "attempts": 1, + "next_retry": time.monotonic() + 30, + } + assert runner._resume_paused_platform(Platform.TELEGRAM) is False + + def test_resume_returns_false_when_not_queued(self): + runner = _make_runner() + assert runner._resume_paused_platform(Platform.TELEGRAM) is False + + +class TestPlatformSlashCommand: + """Test the /platform list|pause|resume slash command handler.""" + + def _make_event(self, content: str): + ev = MagicMock() + ev.content = content + return ev + + @pytest.mark.asyncio + async def test_list_shows_connected_and_paused(self): + runner = _make_runner() + runner.adapters[Platform.DISCORD] = StubAdapter(platform=Platform.DISCORD) + runner._failed_platforms[Platform.WHATSAPP] = { + "config": PlatformConfig(enabled=True, token="t"), + "attempts": 10, + "next_retry": float("inf"), + "paused": True, + "pause_reason": "not paired", + } + out = await runner._handle_platform_command(self._make_event("/platform list")) + assert "discord" in out + assert "whatsapp" in out + assert "PAUSED" in out + assert "not paired" in out + + @pytest.mark.asyncio + async def test_pause_command_pauses_queued_platform(self): + runner = _make_runner() + runner._failed_platforms[Platform.WHATSAPP] = { + "config": PlatformConfig(enabled=True, token="t"), + "attempts": 2, + "next_retry": time.monotonic() + 30, + } + out = await runner._handle_platform_command( + self._make_event("/platform pause whatsapp") + ) + assert "paused" in out.lower() + assert runner._failed_platforms[Platform.WHATSAPP]["paused"] is True + + @pytest.mark.asyncio + async def test_pause_rejects_unqueued_platform(self): + runner = _make_runner() + out = await runner._handle_platform_command( + self._make_event("/platform pause whatsapp") + ) + assert "not in the retry queue" in out + + @pytest.mark.asyncio + async def test_resume_command_resumes_paused_platform(self): + runner = _make_runner() + runner._failed_platforms[Platform.WHATSAPP] = { + "config": PlatformConfig(enabled=True, token="t"), + "attempts": 10, + "next_retry": float("inf"), + "paused": True, + "pause_reason": "x", + } + out = await runner._handle_platform_command( + self._make_event("/platform resume whatsapp") + ) + assert "resumed" in out.lower() + assert runner._failed_platforms[Platform.WHATSAPP]["paused"] is False + + @pytest.mark.asyncio + async def test_unknown_platform_name(self): + runner = _make_runner() + out = await runner._handle_platform_command( + self._make_event("/platform pause notarealplatform") + ) + assert "Unknown platform" in out + + @pytest.mark.asyncio + async def test_bare_platform_shows_usage_with_list(self): + # An empty /platform call defaults to "list". + runner = _make_runner() + out = await runner._handle_platform_command(self._make_event("/platform")) + assert "Gateway platforms" in out + diff --git a/tests/gateway/test_runner_fatal_adapter.py b/tests/gateway/test_runner_fatal_adapter.py index 13b9a7d99e8..706514f1ae6 100644 --- a/tests/gateway/test_runner_fatal_adapter.py +++ b/tests/gateway/test_runner_fatal_adapter.py @@ -68,7 +68,11 @@ async def test_runner_requests_clean_exit_for_nonretryable_startup_conflict(monk @pytest.mark.asyncio async def test_runner_queues_retryable_runtime_fatal_for_reconnection(monkeypatch, tmp_path): """Retryable runtime fatal errors queue the platform for reconnection - instead of shutting down the gateway.""" + AND keep the gateway alive — the background reconnect watcher recovers + the platform when the underlying issue clears. (Previously this + exited-with-failure to trigger a systemd restart; that converted + transient failures into infinite restart loops.) + """ config = GatewayConfig( platforms={ Platform.WHATSAPP: PlatformConfig(enabled=True, token="token") @@ -89,8 +93,8 @@ async def test_runner_queues_retryable_runtime_fatal_for_reconnection(monkeypatc await runner._handle_adapter_fatal_error(adapter) - # Should shut down with failure — systemd Restart=on-failure will restart - runner.stop.assert_awaited_once() - assert runner._exit_with_failure is True + # Gateway stays alive — watcher will retry in background + runner.stop.assert_not_awaited() + assert runner._exit_with_failure is False assert Platform.WHATSAPP in runner._failed_platforms assert runner._failed_platforms[Platform.WHATSAPP]["attempts"] == 0 diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py index fc5c775a779..438553f34ed 100644 --- a/tests/gateway/test_runner_startup_failures.py +++ b/tests/gateway/test_runner_startup_failures.py @@ -64,7 +64,14 @@ class _SuccessfulAdapter(BasePlatformAdapter): @pytest.mark.asyncio -async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch, tmp_path): +async def test_runner_stays_alive_for_retryable_startup_errors(monkeypatch, tmp_path): + """Retryable startup errors should leave the gateway running in + degraded mode so the reconnect watcher can recover the platform when + the underlying problem clears. Previously this returned False from + ``start()`` and exited the process, which converted a single broken + platform (e.g. unpaired WhatsApp, DNS blip on Telegram) into a + systemd restart loop and killed cron jobs in the meantime. + """ monkeypatch.setenv("HERMES_HOME", str(tmp_path)) config = GatewayConfig( platforms={ @@ -78,11 +85,13 @@ async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch, ok = await runner.start() - assert ok is False + # Gateway stays alive in degraded mode; reconnect watcher takes over. + assert ok is True assert runner.should_exit_cleanly is False state = read_runtime_status() - assert state["gateway_state"] == "startup_failed" - assert "temporary DNS resolution failure" in state["exit_reason"] + assert state["gateway_state"] in {"degraded", "running"} + # Telegram was queued for retry, not given up on. + assert Platform.TELEGRAM in runner._failed_platforms assert state["platforms"]["telegram"]["state"] == "retrying" assert state["platforms"]["telegram"]["error_code"] == "telegram_connect_error" diff --git a/tests/gateway/test_whatsapp_connect.py b/tests/gateway/test_whatsapp_connect.py index 0a359fb7511..9d7807734bb 100644 --- a/tests/gateway/test_whatsapp_connect.py +++ b/tests/gateway/test_whatsapp_connect.py @@ -611,3 +611,93 @@ class TestHttpSessionLifecycle: mock_task.cancel.assert_not_called() assert adapter._poll_task is None + + +# --------------------------------------------------------------------------- +# Pre-flight: refuse to start the bridge when creds.json is missing +# --------------------------------------------------------------------------- + + +class TestNoCredsPreflight: + """Verify ``connect()`` fast-fails as non-retryable when WhatsApp is + enabled but the user never finished pairing (no ``creds.json``). + + Without this guard, every gateway boot: + • spawned the bridge subprocess (npm install if needed) + • waited 30s for status:connected (never happens without creds) + • queued WhatsApp for indefinite retries that would just repeat + With the guard, ``connect()`` returns False immediately with a + non-retryable fatal error so the reconnect watcher drops the platform + and the gateway gets a single clear log line telling the user to run + ``hermes whatsapp``. + """ + + @pytest.mark.asyncio + async def test_connect_returns_false_when_no_creds(self, tmp_path): + from gateway.platforms.whatsapp import WhatsAppAdapter + + adapter = WhatsAppAdapter.__new__(WhatsAppAdapter) + adapter.platform = Platform.WHATSAPP + adapter.config = MagicMock() + adapter._bridge_port = 19876 + # Point bridge_script at a real existing file so the earlier + # bridge-missing check doesn't trip — we want to exercise the + # creds.json check specifically. + bridge = tmp_path / "bridge.js" + bridge.write_text("// stub") + adapter._bridge_script = str(bridge) + adapter._session_path = tmp_path / "session" # no creds.json inside + adapter._session_path.mkdir() + adapter._bridge_log_fh = None + adapter._fatal_error_code = None + adapter._fatal_error_message = None + adapter._fatal_error_retryable = True + + with patch( + "gateway.platforms.whatsapp.check_whatsapp_requirements", + return_value=True, + ): + result = await adapter.connect() + + assert result is False + # Non-retryable so the reconnect watcher drops it cleanly + assert adapter._fatal_error_code == "whatsapp_not_paired" + assert adapter._fatal_error_retryable is False + + @pytest.mark.asyncio + async def test_connect_proceeds_when_creds_present(self, tmp_path): + """When creds.json exists, the preflight check is bypassed and + connect() proceeds to the bridge bootstrap path. We don't fully + simulate the bridge here — we just verify no fast-fail occurs. + """ + from gateway.platforms.whatsapp import WhatsAppAdapter + + adapter = WhatsAppAdapter.__new__(WhatsAppAdapter) + adapter.platform = Platform.WHATSAPP + adapter.config = MagicMock() + adapter._bridge_port = 19877 + bridge = tmp_path / "bridge.js" + bridge.write_text("// stub") + adapter._bridge_script = str(bridge) + session_dir = tmp_path / "session" + session_dir.mkdir() + (session_dir / "creds.json").write_text("{}") + adapter._session_path = session_dir + adapter._bridge_log_fh = None + adapter._fatal_error_code = None + adapter._fatal_error_message = None + adapter._fatal_error_retryable = True + # Stub _acquire_platform_lock to return False so connect() exits + # cleanly *after* the preflight, without spawning subprocesses. + adapter._acquire_platform_lock = MagicMock(return_value=False) + + with patch( + "gateway.platforms.whatsapp.check_whatsapp_requirements", + return_value=True, + ): + result = await adapter.connect() + + # Preflight passed — exits because we faked lock acquisition, + # but the fatal-error code is NOT the "not paired" one. + assert result is False + assert adapter._fatal_error_code != "whatsapp_not_paired" diff --git a/tests/hermes_cli/test_whatsapp_setup_ordering.py b/tests/hermes_cli/test_whatsapp_setup_ordering.py new file mode 100644 index 00000000000..47952bcc796 --- /dev/null +++ b/tests/hermes_cli/test_whatsapp_setup_ordering.py @@ -0,0 +1,140 @@ +"""Regression tests for ``cmd_whatsapp`` env-var write ordering. + +Before the fix, ``hermes whatsapp`` wrote ``WHATSAPP_ENABLED=true`` at +step 2 — before npm install (step 4) and before QR pairing (step 6). +If the user Ctrl+C'd at any later step, ``.env`` claimed WhatsApp was +ready when the bridge still had no ``creds.json``. Every subsequent +``hermes gateway`` then paid a 30s bridge-bootstrap timeout and queued +WhatsApp for indefinite retries — looking like "the gateway is broken." + +The fix: only set ``WHATSAPP_ENABLED=true`` once pairing actually +succeeds (creds.json exists). Aborted setup leaves no enabled state. +""" + +from __future__ import annotations + +import io +import os +from contextlib import redirect_stdout +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture +def isolated_home(tmp_path, monkeypatch): + home = tmp_path / "home" + hermes = home / ".hermes" + hermes.mkdir(parents=True) + monkeypatch.setattr(Path, "home", lambda: home) + monkeypatch.setenv("HERMES_HOME", str(hermes)) + # Ensure get_env_value cache doesn't carry stale state. + for key in list(os.environ): + if key.startswith("WHATSAPP_"): + monkeypatch.delenv(key, raising=False) + return hermes + + +def _env_value(hermes_home: Path, key: str) -> str | None: + env_file = hermes_home / ".env" + if not env_file.exists(): + return None + for line in env_file.read_text().splitlines(): + if "=" not in line: + continue + k, _, v = line.partition("=") + if k.strip() == key: + return v.strip().strip('"').strip("'") + return None + + +def test_aborted_setup_does_not_enable_whatsapp(isolated_home, monkeypatch): + """User picks mode 1, then Ctrl+C's at the allowed-users prompt. + + WHATSAPP_ENABLED must NOT be present in .env after abort. + """ + from hermes_cli.main import cmd_whatsapp + + # First input() = mode choice, second input() = allowed-users prompt + # We raise KeyboardInterrupt on the second call to simulate abort. + inputs = iter(["1"]) + + def fake_input(_prompt=""): + try: + return next(inputs) + except StopIteration: + raise KeyboardInterrupt + + monkeypatch.setattr("builtins.input", fake_input) + # _require_tty calls sys.stdin.isatty — make it pass. + monkeypatch.setattr("hermes_cli.main._require_tty", lambda *_a, **_kw: None) + # No node, no bridge script — we shouldn't reach those steps anyway. + + buf = io.StringIO() + with redirect_stdout(buf): + try: + cmd_whatsapp(MagicMock()) + except KeyboardInterrupt: + pass + + assert _env_value(isolated_home, "WHATSAPP_ENABLED") is None, ( + "Setup aborted before pairing — WHATSAPP_ENABLED must not be set. " + f"Got .env: {(isolated_home / '.env').read_text() if (isolated_home / '.env').exists() else '(missing)'}" + ) + + +def test_existing_pairing_skip_branch_enables_whatsapp(isolated_home, monkeypatch): + """User runs ``hermes whatsapp`` with an existing paired session and + chooses "no, keep my session" at the re-pair prompt. The env var + should be (re-)written to true so the gateway picks WhatsApp back up, + even if the var was lost since the original pairing. + """ + from hermes_cli.main import cmd_whatsapp + + # Pre-create a paired session WITHOUT WHATSAPP_ENABLED in .env. + session = isolated_home / "whatsapp" / "session" + session.mkdir(parents=True) + (session / "creds.json").write_text("{}") + monkeypatch.setenv("WHATSAPP_MODE", "bot") + monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "15551234567") + + # mode already set → skip mode prompt; users already set → skip update + # prompt with "no"; pairing exists → "no, keep session" → return. + inputs = iter(["n", "n"]) + + def fake_input(_prompt=""): + try: + return next(inputs) + except StopIteration: + return "n" + + monkeypatch.setattr("builtins.input", fake_input) + monkeypatch.setattr("hermes_cli.main._require_tty", lambda *_a, **_kw: None) + # Skip the bridge npm install — we're testing setup-ordering, not bridge + # bootstrapping. Pretend node_modules exists (Path.exists -> True for that + # specific check is hard to scope, so instead pretend npm install would + # succeed silently if reached). + monkeypatch.setattr( + "subprocess.run", + lambda *_a, **_kw: MagicMock(returncode=0, stderr=""), + ) + monkeypatch.setattr("shutil.which", lambda _name: "/usr/bin/npm") + # Patch (bridge_dir / "node_modules").exists() by stubbing Path.exists + # to True for that one specific subpath. Easier: pre-create it as a + # symlink to /tmp. But we can't write to the repo. Instead, stub + # Path.exists wholesale to True for node_modules; the creds.json check + # in the same function still works because we wrote it ourselves. + _orig_exists = Path.exists + def _stub_exists(self): + if self.name == "node_modules": + return True + return _orig_exists(self) + monkeypatch.setattr(Path, "exists", _stub_exists) + + buf = io.StringIO() + with redirect_stdout(buf): + cmd_whatsapp(MagicMock()) + + # The skip-rebar branch should have set the env var on its way out. + assert _env_value(isolated_home, "WHATSAPP_ENABLED") == "true" From 032fb842225dedf5e6649489f81631465f1aa809 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:44:27 -0700 Subject: [PATCH 062/218] docs(hermes_tools_mcp_server): align scope docstring with EXPOSED_TOOLS (#26603) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The top-of-file scope docstring listed delegate_task, memory, and session_search as exposed tools, but EXPOSED_TOOLS deliberately omits them (they're _AGENT_LOOP_TOOLS and require the running AIAgent context to dispatch — the inline comment block already explains this). Kanban tools, which ARE exposed, were missing from the docstring entirely. Rewrite the Scope / DO NOT expose sections to match the actual tuple: drop delegate_task/memory/session_search from 'expose', add the kanban_* family, move delegate_task/memory/session_search/todo into 'DO NOT expose' with the agent-loop rationale. Fixes #26567 (doc-only fix; option 2 — shimming memory/session_search through MemoryStore/SessionDB directly — left for a follow-up issue once the plugin-memory locking story is audited). --- agent/transports/hermes_tools_mcp_server.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/agent/transports/hermes_tools_mcp_server.py b/agent/transports/hermes_tools_mcp_server.py index f7f8ae24887..37f2d6179d1 100644 --- a/agent/transports/hermes_tools_mcp_server.py +++ b/agent/transports/hermes_tools_mcp_server.py @@ -14,20 +14,28 @@ the user gets full Hermes capability inside a Codex turn. Scope (what we expose): - web_search, web_extract — Firecrawl, no codex equivalent - browser_navigate / _click / _type / — Camofox/Browserbase automation - _snapshot / _screenshot / _scroll / _back / _press / _vision - - delegate_task — Hermes subagents + _snapshot / _scroll / _back / _press / + _get_images / _console / _vision - vision_analyze — image inspection by vision model - image_generate — image generation - - memory — Hermes' persistent memory store - skill_view, skills_list — Hermes' skill library - - session_search — cross-session search - text_to_speech — TTS + - kanban_* (complete/block/comment/ — kanban worker + orchestrator + heartbeat/show/list/create/ handoff (stateless: read env var, + unblock/link) write ~/.hermes/kanban.db) -What we DO NOT expose (codex has equivalents): +What we DO NOT expose: - terminal / shell — codex's own shell tool - read_file / write_file / patch — codex's apply_patch + shell - search_files / process — codex's shell - - clarify, todo — codex's own UX + - clarify — codex's own UX + - delegate_task / memory / — `_AGENT_LOOP_TOOLS` in Hermes + session_search / todo (model_tools.py). They require + the running AIAgent context to + dispatch (mid-loop state), so a + stateless MCP callback can't + drive them. See the inline + comment on EXPOSED_TOOLS below. Run with: python -m agent.transports.hermes_tools_mcp_server Spawned by: CodexAppServerSession.ensure_started() when the runtime is From 3215ef160938c71ff61bab279b30545c0cc14a14 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 11:59:22 +0000 Subject: [PATCH 063/218] ci(pypi): build web dashboard + TUI bundle before creating wheel --- .github/workflows/upload_to_pypi.yml | 21 +++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/upload_to_pypi.yml b/.github/workflows/upload_to_pypi.yml index 4e2fe4748d3..ae68ed034a1 100644 --- a/.github/workflows/upload_to_pypi.yml +++ b/.github/workflows/upload_to_pypi.yml @@ -50,6 +50,27 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6 + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + + - name: Build web dashboard + run: cd web && npm ci && npm run build + + - name: Build TUI bundle + run: cd ui-tui && npm ci && npm run build + + - name: Bundle TUI into hermes_cli + run: | + mkdir -p hermes_cli/tui_dist + cp ui-tui/dist/entry.js hermes_cli/tui_dist/entry.js + + - name: Verify frontend assets exist + run: | + test -f hermes_cli/web_dist/index.html || { echo "ERROR: web_dist not built"; exit 1; } + test -f hermes_cli/tui_dist/entry.js || { echo "ERROR: tui_dist not built"; exit 1; } + - name: Build wheel and sdist run: uv build --sdist --wheel diff --git a/pyproject.toml b/pyproject.toml index ae2fff385a3..87674601db0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -210,7 +210,7 @@ hermes-acp = "acp_adapter.entry:main" py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"] [tool.setuptools.package-data] -hermes_cli = ["web_dist/**/*"] +hermes_cli = ["web_dist/**/*", "tui_dist/**/*"] gateway = ["assets/**/*"] acp_adapter = ["bootstrap/*.sh", "bootstrap/*.ps1"] From 384ec9684e86081c4add84d671d2bbf7c8ee69d4 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:00:07 +0000 Subject: [PATCH 064/218] feat(banner): check PyPI for updates when not a git install For pip-installed hermes-agent (no .git directory), fall back to querying PyPI's JSON API to compare __version__ against the latest published release, using stdlib only (urllib + json, no packaging dep). --- hermes_cli/banner.py | 48 +++++++++++++++++++++- tests/hermes_cli/test_banner_pip_update.py | 35 ++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 tests/hermes_cli/test_banner_pip_update.py diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index 036412ac072..061992b4746 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -175,6 +175,49 @@ def _check_via_local_git(repo_dir: Path) -> Optional[int]: return None +def _version_tuple(v: str) -> tuple[int, ...]: + """Parse '0.13.0' into (0, 13, 0) for comparison. Non-numeric segments become 0.""" + parts = [] + for segment in v.split("."): + try: + parts.append(int(segment)) + except ValueError: + parts.append(0) + return tuple(parts) + + +def _fetch_pypi_latest(package: str = "hermes-agent") -> Optional[str]: + """Fetch the latest version of a package from PyPI. Returns None on failure.""" + try: + import urllib.request + import json as _json + url = f"https://pypi.org/pypi/{package}/json" + req = urllib.request.Request(url, headers={"Accept": "application/json"}) + with urllib.request.urlopen(req, timeout=5) as resp: + data = _json.loads(resp.read()) + return data.get("info", {}).get("version") + except Exception: + return None + + +def _check_via_pypi() -> Optional[int]: + """Compare installed version against PyPI latest. + + Returns 0 if up-to-date, 1 if behind, None on failure. + """ + latest = _fetch_pypi_latest() + if latest is None: + return None + if latest == VERSION: + return 0 + try: + if _version_tuple(latest) > _version_tuple(VERSION): + return 1 + return 0 + except Exception: + return 1 if latest != VERSION else 0 + + def check_for_updates() -> Optional[int]: """Check whether a Hermes update is available. @@ -213,8 +256,9 @@ def check_for_updates() -> Optional[int]: if not (repo_dir / ".git").exists(): repo_dir = hermes_home / "hermes-agent" if not (repo_dir / ".git").exists(): - return None - behind = _check_via_local_git(repo_dir) + behind = _check_via_pypi() + else: + behind = _check_via_local_git(repo_dir) try: cache_file.write_text(json.dumps({"ts": now, "behind": behind, "rev": embedded_rev})) diff --git a/tests/hermes_cli/test_banner_pip_update.py b/tests/hermes_cli/test_banner_pip_update.py new file mode 100644 index 00000000000..a0e9266f698 --- /dev/null +++ b/tests/hermes_cli/test_banner_pip_update.py @@ -0,0 +1,35 @@ +from unittest.mock import patch + + +def test_check_via_pypi_detects_update(): + """_check_via_pypi returns 1 when PyPI has newer version.""" + from hermes_cli.banner import _check_via_pypi + with patch("hermes_cli.banner.VERSION", "0.12.0"): + with patch("hermes_cli.banner._fetch_pypi_latest", return_value="0.13.0"): + result = _check_via_pypi() + assert result == 1 + + +def test_check_via_pypi_up_to_date(): + """_check_via_pypi returns 0 when versions match.""" + from hermes_cli.banner import _check_via_pypi + with patch("hermes_cli.banner.VERSION", "0.13.0"): + with patch("hermes_cli.banner._fetch_pypi_latest", return_value="0.13.0"): + result = _check_via_pypi() + assert result == 0 + + +def test_check_via_pypi_network_failure(): + """_check_via_pypi returns None on network error.""" + from hermes_cli.banner import _check_via_pypi + with patch("hermes_cli.banner._fetch_pypi_latest", return_value=None): + result = _check_via_pypi() + assert result is None + + +def test_version_tuple_comparison(): + """Version comparison works with multi-segment versions.""" + from hermes_cli.banner import _version_tuple + assert _version_tuple("0.13.0") > _version_tuple("0.12.0") + assert _version_tuple("0.13.0") == _version_tuple("0.13.0") + assert _version_tuple("1.0.0") > _version_tuple("0.99.99") From cc07e30f45267c00fac97ea5569c606aca5a1ffb Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:00:28 +0000 Subject: [PATCH 065/218] feat(install): add --ensure and --postinstall modes for targeted dep bootstrap Adds --ensure DEPS for pip-runtime dep installation and --postinstall for pip users who want the full post-install experience without cloning. --- scripts/install.sh | 106 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/scripts/install.sh b/scripts/install.sh index 9c5db6b1c08..9b1b7469bb8 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -71,6 +71,8 @@ USE_VENV=true RUN_SETUP=true SKIP_BROWSER=false BRANCH="main" +ENSURE_DEPS="" +POSTINSTALL_MODE=false # Detect non-interactive mode (e.g. curl | bash) # When stdin is not a terminal, read -p will fail with EOF, @@ -109,6 +111,14 @@ while [[ $# -gt 0 ]]; do HERMES_HOME="$2" shift 2 ;; + --ensure) + ENSURE_DEPS="$2" + shift 2 + ;; + --postinstall) + POSTINSTALL_MODE=true + shift + ;; -h|--help) echo "Hermes Agent Installer" echo "" @@ -133,6 +143,12 @@ while [[ $# -gt 0 ]]; do echo " (default /root/.hermes). This keeps Docker bind-mounted volumes" echo " small and ensures the command is on PATH for all shells." echo " Existing installs at \$HERMES_HOME/hermes-agent are preserved in-place." + echo " --ensure DEPS Install only specified deps (comma-separated)" + echo " Supported: node, browser, ripgrep, ffmpeg" + echo " Does NOT clone repo or create venv" + echo " --postinstall Run post-install setup only (for pip users)" + echo " Installs optional deps + runs hermes setup" + echo " Does NOT clone repo or create venv" exit 0 ;; *) @@ -1872,6 +1888,88 @@ print_success() { fi } +ensure_mode() { + detect_os + + IFS=',' read -ra DEPS <<< "$ENSURE_DEPS" + for dep in "${DEPS[@]}"; do + dep="$(echo "$dep" | tr -d '[:space:]')" + case "$dep" in + node) + check_node + ;; + browser) + check_node + if [ "$HAS_NODE" = true ]; then + DETECTED_BROWSER_EXECUTABLE="$(find_system_browser 2>/dev/null || true)" + if [ -z "$DETECTED_BROWSER_EXECUTABLE" ]; then + log_info "Installing agent-browser + Chromium..." + npm_bin="$(command -v npm 2>/dev/null || echo "")" + if [ -n "$npm_bin" ]; then + local agent_browser_dir="$HERMES_HOME/node_modules" + mkdir -p "$agent_browser_dir" + "$npm_bin" install --prefix "$HERMES_HOME" agent-browser 2>/dev/null || true + npx playwright install chromium 2>/dev/null || true + fi + else + log_success "System browser found: $DETECTED_BROWSER_EXECUTABLE" + fi + fi + ;; + ripgrep) + if ! command -v rg &>/dev/null; then + HAS_RIPGREP=false + HAS_FFMPEG=true + install_system_packages + fi + ;; + ffmpeg) + if ! command -v ffmpeg &>/dev/null; then + HAS_FFMPEG=false + HAS_RIPGREP=true + install_system_packages + fi + ;; + *) + log_warn "Unknown dependency: $dep" + ;; + esac + done +} + +postinstall_mode() { + print_banner + detect_os + + log_info "Post-install mode: setting up Hermes for pip install" + + check_node + check_network_prerequisites + install_system_packages + + if [ "$HAS_NODE" = true ] && [ "$SKIP_BROWSER" = false ]; then + DETECTED_BROWSER_EXECUTABLE="$(find_system_browser 2>/dev/null || true)" + if [ -z "$DETECTED_BROWSER_EXECUTABLE" ]; then + log_info "Installing browser engine..." + npm_bin="$(command -v npm 2>/dev/null || echo "")" + if [ -n "$npm_bin" ]; then + npx playwright install chromium 2>/dev/null || true + fi + else + log_success "System browser found: $DETECTED_BROWSER_EXECUTABLE" + fi + fi + + HERMES_CMD="$(command -v hermes 2>/dev/null || echo "")" + if [ -n "$HERMES_CMD" ]; then + log_info "Running hermes setup..." + "$HERMES_CMD" setup + else + log_warn "hermes command not found on PATH" + log_info "Try: python -m hermes_cli.main setup" + fi +} + # ============================================================================ # Main # ============================================================================ @@ -1900,4 +1998,10 @@ main() { print_success } -main +if [ -n "$ENSURE_DEPS" ]; then + ensure_mode +elif [ "$POSTINSTALL_MODE" = true ]; then + postinstall_mode +else + main +fi From c4bda3f27c033f33eef824efc3e689119bfbee72 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:00:54 +0000 Subject: [PATCH 066/218] fix(doctor): generate config from defaults when template file is missing When cli-config.yaml.example is not present (e.g. pip wheel install), fall back to writing DEFAULT_CONFIG via save_config() instead of warning and requiring a manual fix. --- hermes_cli/doctor.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index c2035b03e6e..bf5a8865909 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -656,15 +656,17 @@ def run_doctor(args): if fallback_config.exists(): check_ok("cli-config.yaml exists (in project directory)") else: - example_config = PROJECT_ROOT / 'cli-config.yaml.example' - if should_fix and example_config.exists(): + if should_fix: config_path.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(str(example_config), str(config_path)) - check_ok(f"Created {_DHH}/config.yaml from cli-config.yaml.example") + example_config = PROJECT_ROOT / 'cli-config.yaml.example' + if example_config.exists(): + shutil.copy2(str(example_config), str(config_path)) + check_ok(f"Created {_DHH}/config.yaml from cli-config.yaml.example") + else: + from hermes_cli.config import DEFAULT_CONFIG, save_config + save_config(DEFAULT_CONFIG) + check_ok(f"Created {_DHH}/config.yaml from defaults") fixed_count += 1 - elif should_fix: - check_warn("config.yaml not found and no example to copy from") - manual_issues.append(f"Create {_DHH}/config.yaml manually") else: check_warn("config.yaml not found", "(using defaults)") From d69eab1efd96a4622e6b00fbb806d1cd049b3589 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:01:31 +0000 Subject: [PATCH 067/218] fix(gateway): build service PATH from existing dirs only, include ~/.hermes/node_modules Extract PATH building into _build_service_path_dirs() that skips directories which don't exist on disk (e.g. node_modules/.bin for pip installs) and also includes ~/.hermes/node/bin and ~/.hermes/node_modules/.bin for agent-browser. --- hermes_cli/gateway.py | 38 +++++++++++++++---- .../hermes_cli/test_gateway_service_paths.py | 31 +++++++++++++++ 2 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 tests/hermes_cli/test_gateway_service_paths.py diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index b0cb579daa8..a865bcaf8be 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -2103,15 +2103,41 @@ def _hermes_home_for_target_user(target_home_dir: str) -> str: return str(current_hermes) +def _build_service_path_dirs(project_root: Path | None = None) -> list[str]: + """Build PATH directory list for service units, excluding non-existent dirs.""" + if project_root is None: + project_root = PROJECT_ROOT + + candidates = [] + + venv_bin = project_root / "venv" / "bin" + if venv_bin.is_dir(): + candidates.append(str(venv_bin)) + elif sys.prefix != sys.base_prefix: + candidates.append(str(Path(sys.prefix) / "bin")) + + node_bin = project_root / "node_modules" / ".bin" + if node_bin.is_dir(): + candidates.append(str(node_bin)) + + hermes_home = get_hermes_home() + hermes_node = hermes_home / "node" / "bin" + if hermes_node.is_dir(): + candidates.append(str(hermes_node)) + hermes_nm = hermes_home / "node_modules" / ".bin" + if hermes_nm.is_dir(): + candidates.append(str(hermes_nm)) + + return candidates + + def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str: python_path = get_python_path() working_dir = str(PROJECT_ROOT) detected_venv = _detect_venv_dir() venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv") - venv_bin = str(detected_venv / "bin") if detected_venv else str(PROJECT_ROOT / "venv" / "bin") - node_bin = str(PROJECT_ROOT / "node_modules" / ".bin") - path_entries = [venv_bin, node_bin] + path_entries = _build_service_path_dirs() resolved_node = shutil.which("node") if resolved_node: resolved_node_dir = str(Path(resolved_node).resolve().parent) @@ -2138,8 +2164,6 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) python_path = _remap_path_for_user(python_path, home_dir) working_dir = _remap_path_for_user(working_dir, home_dir) venv_dir = _remap_path_for_user(venv_dir, home_dir) - venv_bin = _remap_path_for_user(venv_bin, home_dir) - node_bin = _remap_path_for_user(node_bin, home_dir) path_entries = [_remap_path_for_user(p, home_dir) for p in path_entries] path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries)) path_entries.extend(_build_wsl_interop_paths(path_entries)) @@ -2754,12 +2778,10 @@ def generate_launchd_plist() -> str: # the systemd unit), then capture the user's full shell PATH so every # user-installed tool (node, ffmpeg, …) is reachable. detected_venv = _detect_venv_dir() - venv_bin = str(detected_venv / "bin") if detected_venv else str(PROJECT_ROOT / "venv" / "bin") venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv") - node_bin = str(PROJECT_ROOT / "node_modules" / ".bin") # Resolve the directory containing the node binary (e.g. Homebrew, nvm) # so it's explicitly in PATH even if the user's shell PATH changes later. - priority_dirs = [venv_bin, node_bin] + priority_dirs = _build_service_path_dirs() resolved_node = shutil.which("node") if resolved_node: resolved_node_dir = str(Path(resolved_node).resolve().parent) diff --git a/tests/hermes_cli/test_gateway_service_paths.py b/tests/hermes_cli/test_gateway_service_paths.py new file mode 100644 index 00000000000..71abc4aef24 --- /dev/null +++ b/tests/hermes_cli/test_gateway_service_paths.py @@ -0,0 +1,31 @@ +from pathlib import Path +from unittest.mock import patch + + +def test_service_path_skips_nonexistent_node_modules(tmp_path): + """Service PATH should not include node_modules/.bin if it doesn't exist.""" + from hermes_cli.gateway import _build_service_path_dirs + with patch("hermes_cli.gateway.get_hermes_home", return_value=tmp_path / ".hermes"): + dirs = _build_service_path_dirs(project_root=tmp_path) + node_modules_bin = str(tmp_path / "node_modules" / ".bin") + assert node_modules_bin not in dirs + + +def test_service_path_includes_node_modules_when_present(tmp_path): + """Service PATH should include node_modules/.bin when it exists.""" + nm_bin = tmp_path / "node_modules" / ".bin" + nm_bin.mkdir(parents=True) + from hermes_cli.gateway import _build_service_path_dirs + with patch("hermes_cli.gateway.get_hermes_home", return_value=tmp_path / ".hermes"): + dirs = _build_service_path_dirs(project_root=tmp_path) + assert str(nm_bin) in dirs + + +def test_service_path_includes_hermes_home_node_modules(tmp_path): + """Service PATH should include ~/.hermes/node_modules/.bin when it exists.""" + hermes_nm = tmp_path / ".hermes" / "node_modules" / ".bin" + hermes_nm.mkdir(parents=True) + from hermes_cli.gateway import _build_service_path_dirs + with patch("hermes_cli.gateway.get_hermes_home", return_value=tmp_path / ".hermes"): + dirs = _build_service_path_dirs(project_root=tmp_path) + assert str(hermes_nm) in dirs From b2bf658442f413a9a1d24b011589e5e38544947e Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:02:23 +0000 Subject: [PATCH 068/218] feat(tui): find bundled entry.js from wheel before falling back to npm build Add _find_bundled_tui() that checks for hermes_cli/tui_dist/entry.js (present in wheel installs) and wire it into _make_tui_argv() between the HERMES_TUI_DIR prebuilt path and the npm install fallback. --- hermes_cli/main.py | 14 ++++++++++++++ tests/hermes_cli/test_tui_bundled.py | 21 +++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 tests/hermes_cli/test_tui_bundled.py diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 7eedc3fd322..1324ff8e8e7 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1024,6 +1024,14 @@ def _ensure_tui_node() -> None: os.environ["PATH"] = os.pathsep.join(parts) +def _find_bundled_tui(hermes_cli_dir: Path | None = None) -> Path | None: + """Find a pre-built TUI entry.js bundled in the wheel.""" + if hermes_cli_dir is None: + hermes_cli_dir = Path(__file__).parent + bundled = hermes_cli_dir / "tui_dist" / "entry.js" + return bundled if bundled.is_file() else None + + def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]: """TUI: --dev → tsx src; else node dist (HERMES_TUI_DIR prebuilt or esbuild).""" _ensure_tui_node() @@ -1058,6 +1066,12 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]: node = _node_bin("node") return [node, str(p / "dist" / "entry.js")], p + # 1b. Bundled in wheel (pip install) + bundled = _find_bundled_tui() + if bundled is not None: + node = _node_bin("node") + return [node, str(bundled)], bundled.parent + # 2. Normal flow: npm install if needed, always esbuild, then node dist/entry.js. # --dev flow: npm install if needed, then tsx src/entry.tsx (no build). if _tui_need_npm_install(tui_dir): diff --git a/tests/hermes_cli/test_tui_bundled.py b/tests/hermes_cli/test_tui_bundled.py new file mode 100644 index 00000000000..c49443a3f76 --- /dev/null +++ b/tests/hermes_cli/test_tui_bundled.py @@ -0,0 +1,21 @@ +from pathlib import Path + + +def test_tui_finds_bundled_entry_js(tmp_path): + """_find_bundled_tui finds entry.js bundled in the package.""" + tui_dist = tmp_path / "hermes_cli" / "tui_dist" + tui_dist.mkdir(parents=True) + entry = tui_dist / "entry.js" + entry.write_text("// bundled TUI", encoding="utf-8") + + from hermes_cli.main import _find_bundled_tui + result = _find_bundled_tui(hermes_cli_dir=tmp_path / "hermes_cli") + assert result is not None + assert result.name == "entry.js" + + +def test_tui_returns_none_when_no_bundle(tmp_path): + """_find_bundled_tui returns None when no bundle exists.""" + from hermes_cli.main import _find_bundled_tui + result = _find_bundled_tui(hermes_cli_dir=tmp_path / "hermes_cli") + assert result is None From 624ce11ee846b57b59ca2e031f34e25813137c4d Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:02:44 +0000 Subject: [PATCH 069/218] feat(config): detect pip install method and recommend correct update command Adds detect_install_method() to identify nixos/homebrew/git/pip installs, and recommended_update_command_for_method() to return the right upgrade command for each method. Updates recommended_update_command() to use these for pip-installed instances (no .git dir, not managed). --- hermes_cli/config.py | 33 ++++++++++++++++- .../hermes_cli/test_pip_install_detection.py | 37 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 tests/hermes_cli/test_pip_install_detection.py diff --git a/hermes_cli/config.py b/hermes_cli/config.py index a560e1e6a1e..10dd7b46412 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -199,9 +199,40 @@ def get_managed_update_command() -> Optional[str]: return None +def detect_install_method(project_root: Optional[Path] = None) -> str: + """Detect how Hermes was installed: 'nixos', 'homebrew', 'git', or 'pip'.""" + managed = get_managed_system() + if managed: + return managed.lower().replace(" ", "-") + if project_root is None: + project_root = Path(__file__).parent.parent.resolve() + if (project_root / ".git").is_dir(): + return "git" + return "pip" + + +def recommended_update_command_for_method(method: str) -> str: + """Return the update command for a given install method.""" + if method == "nixos": + return "sudo nixos-rebuild switch" + if method == "homebrew": + return "brew upgrade hermes-agent" + if method == "pip": + import shutil + uv = shutil.which("uv") + if uv: + return "uv pip install --upgrade hermes-agent" + return "pip install --upgrade hermes-agent" + return "hermes update" + + def recommended_update_command() -> str: """Return the best update command for the current installation.""" - return get_managed_update_command() or "hermes update" + managed_cmd = get_managed_update_command() + if managed_cmd: + return managed_cmd + method = detect_install_method() + return recommended_update_command_for_method(method) def format_managed_message(action: str = "modify this Hermes installation") -> str: diff --git a/tests/hermes_cli/test_pip_install_detection.py b/tests/hermes_cli/test_pip_install_detection.py new file mode 100644 index 00000000000..b0f4cbd75ad --- /dev/null +++ b/tests/hermes_cli/test_pip_install_detection.py @@ -0,0 +1,37 @@ +from pathlib import Path +from unittest.mock import patch + + +def test_pip_install_detected_when_no_git_dir(tmp_path): + """When PROJECT_ROOT has no .git, detect as pip install.""" + with patch("hermes_cli.config.get_managed_system", return_value=None): + from hermes_cli.config import detect_install_method + method = detect_install_method(project_root=tmp_path) + assert method == "pip" + + +def test_git_install_detected_when_git_dir_exists(tmp_path): + """When PROJECT_ROOT has .git, detect as git install.""" + (tmp_path / ".git").mkdir() + with patch("hermes_cli.config.get_managed_system", return_value=None): + from hermes_cli.config import detect_install_method + method = detect_install_method(project_root=tmp_path) + assert method == "git" + + +def test_managed_install_takes_precedence(tmp_path): + """When HERMES_MANAGED is set, that takes precedence over git detection.""" + (tmp_path / ".git").mkdir() + with patch("hermes_cli.config.get_managed_system", return_value="NixOS"): + from hermes_cli.config import detect_install_method + method = detect_install_method(project_root=tmp_path) + assert method == "nixos" + + +def test_recommended_update_command_pip(): + """Pip installs recommend pip install --upgrade.""" + from hermes_cli.config import recommended_update_command_for_method + cmd = recommended_update_command_for_method("pip") + assert "pip install" in cmd or "uv pip install" in cmd + assert "--upgrade" in cmd + assert "hermes-agent" in cmd From 79afa50703d18f91fb7878a7b7a31b425ab40382 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:04:34 +0000 Subject: [PATCH 070/218] feat(update): support pip install --upgrade for PyPI installs When .git is absent and detect_install_method returns "pip", fork hermes update to run `uv pip install --upgrade hermes-agent` (or `python -m pip install --upgrade hermes-agent` as fallback) instead of hard-exiting with "Not a git repository". --- hermes_cli/main.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 1324ff8e8e7..ea050126736 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -7671,6 +7671,29 @@ def cmd_update(args): _finalize_update_output(_update_io_state) +def _cmd_update_pip(args): + """Update Hermes via pip (for PyPI installs).""" + import subprocess as _sp + from hermes_cli import __version__ + + print(f"→ Current version: {__version__}") + print("→ Checking PyPI for updates...") + + uv = shutil.which("uv") + if uv: + cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"] + else: + cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"] + + print(f"→ Running: {' '.join(cmd)}") + result = _sp.run(cmd) + if result.returncode != 0: + print("✗ Update failed") + sys.exit(1) + + print("✓ Update complete! Restart hermes to use the new version.") + + def _cmd_update_impl(args, gateway_mode: bool): """Body of ``cmd_update`` — kept separate so the wrapper can always restore stdio even on ``sys.exit``.""" @@ -7698,6 +7721,11 @@ def _cmd_update_impl(args, gateway_mode: bool): if sys.platform == "win32": use_zip_update = True else: + from hermes_cli.config import detect_install_method + method = detect_install_method(PROJECT_ROOT) + if method == "pip": + _cmd_update_pip(args) + return print("✗ Not a git repository. Please reinstall:") print( " curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash" From bea96e5cac3caf12885056fbc3a400cb5c008540 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:05:12 +0000 Subject: [PATCH 071/218] chore(config): expand ensure_hermes_home to create full directory scaffold Match the full set of subdirs created by install.sh: pairing, hooks, image_cache, audio_cache, and skills are now pre-created alongside the existing cron, sessions, logs, logs/curator, and memories dirs. This makes hermes doctor checks cleaner without changing any runtime behaviour. --- hermes_cli/config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 10dd7b46412..508de0d3faa 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -432,7 +432,10 @@ def ensure_hermes_home(): else: home.mkdir(parents=True, exist_ok=True) _secure_dir(home) - for subdir in ("cron", "sessions", "logs", "logs/curator", "memories"): + for subdir in ( + "cron", "sessions", "logs", "logs/curator", "memories", + "pairing", "hooks", "image_cache", "audio_cache", "skills", + ): d = home / subdir d.mkdir(parents=True, exist_ok=True) _secure_dir(d) From 259ae846c8ae1b84d4cbd2cb1d62c6eefd81957f Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:06:05 +0000 Subject: [PATCH 072/218] feat: add ensure_dependency() wrapper + ship install.sh in wheel Includes paired change: browser tool now searches ~/.hermes/node_modules/.bin/ for agent-browser installed via install.sh --ensure browser. --- .github/workflows/upload_to_pypi.yml | 5 ++ hermes_cli/dep_ensure.py | 96 ++++++++++++++++++++++++++++ pyproject.toml | 2 +- tests/hermes_cli/test_dep_ensure.py | 43 +++++++++++++ tools/browser_tool.py | 3 +- 5 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 hermes_cli/dep_ensure.py create mode 100644 tests/hermes_cli/test_dep_ensure.py diff --git a/.github/workflows/upload_to_pypi.yml b/.github/workflows/upload_to_pypi.yml index ae68ed034a1..9dce018d690 100644 --- a/.github/workflows/upload_to_pypi.yml +++ b/.github/workflows/upload_to_pypi.yml @@ -71,6 +71,11 @@ jobs: test -f hermes_cli/web_dist/index.html || { echo "ERROR: web_dist not built"; exit 1; } test -f hermes_cli/tui_dist/entry.js || { echo "ERROR: tui_dist not built"; exit 1; } + - name: Bundle install.sh into wheel + run: | + mkdir -p hermes_cli/scripts + cp scripts/install.sh hermes_cli/scripts/install.sh + - name: Build wheel and sdist run: uv build --sdist --wheel diff --git a/hermes_cli/dep_ensure.py b/hermes_cli/dep_ensure.py new file mode 100644 index 00000000000..03ddd80ef84 --- /dev/null +++ b/hermes_cli/dep_ensure.py @@ -0,0 +1,96 @@ +"""Lazy dependency bootstrapper for non-Python runtime deps. + +Wraps install.sh --ensure to install node, browser, ripgrep, ffmpeg +on first use. Prompts interactively unless told not to. +""" +from __future__ import annotations + +import os +import shutil +import subprocess +import sys +from pathlib import Path + +_DEP_CHECKS = { + "node": lambda: shutil.which("node") is not None, + "browser": lambda: ( + shutil.which("agent-browser") is not None + or _has_system_browser() + or _has_hermes_agent_browser() + ), + "ripgrep": lambda: shutil.which("rg") is not None, + "ffmpeg": lambda: shutil.which("ffmpeg") is not None, +} + +_DEP_DESCRIPTIONS = { + "node": "Node.js (required for browser tools and TUI)", + "browser": "Browser engine (Chromium, for web browsing tools)", + "ripgrep": "ripgrep (fast file search)", + "ffmpeg": "ffmpeg (TTS voice messages)", +} + + +def _has_system_browser() -> bool: + for name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"): + if shutil.which(name): + return True + return False + + +def _has_hermes_agent_browser() -> bool: + hermes_home = os.environ.get("HERMES_HOME", str(Path.home() / ".hermes")) + return (Path(hermes_home) / "node_modules" / ".bin" / "agent-browser").is_file() + + +def _find_install_script( + package_dir: Path | None = None, + repo_root: Path | None = None, +) -> Path | None: + """Locate install.sh — bundled in wheel or in git checkout.""" + if package_dir is None: + package_dir = Path(__file__).parent + if repo_root is None: + repo_root = package_dir.parent + + bundled = package_dir / "scripts" / "install.sh" + if bundled.is_file(): + return bundled + repo = repo_root / "scripts" / "install.sh" + if repo.is_file(): + return repo + return None + + +def ensure_dependency(dep: str, interactive: bool = True) -> bool: + """Ensure a non-Python dependency is available. Returns True if available.""" + check = _DEP_CHECKS.get(dep) + if check and check(): + return True + + script = _find_install_script() + if script is None: + if interactive: + desc = _DEP_DESCRIPTIONS.get(dep, dep) + print(f" {desc} is not installed and install.sh was not found.") + print(f" Install {dep} manually and try again.") + return False + + if interactive and sys.stdin.isatty(): + desc = _DEP_DESCRIPTIONS.get(dep, dep) + try: + reply = input(f"{desc} is not installed. Install now? [Y/n] ").strip().lower() + except (EOFError, KeyboardInterrupt): + return False + if reply not in ("", "y", "yes"): + return False + + result = subprocess.run( + ["bash", str(script), "--ensure", dep], + env={**os.environ, "IS_INTERACTIVE": "false"}, + ) + if result.returncode != 0: + return False + + if check: + return check() + return True diff --git a/pyproject.toml b/pyproject.toml index 87674601db0..fff11f6a5d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -210,7 +210,7 @@ hermes-acp = "acp_adapter.entry:main" py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"] [tool.setuptools.package-data] -hermes_cli = ["web_dist/**/*", "tui_dist/**/*"] +hermes_cli = ["web_dist/**/*", "tui_dist/**/*", "scripts/install.sh"] gateway = ["assets/**/*"] acp_adapter = ["bootstrap/*.sh", "bootstrap/*.ps1"] diff --git a/tests/hermes_cli/test_dep_ensure.py b/tests/hermes_cli/test_dep_ensure.py new file mode 100644 index 00000000000..c980c290099 --- /dev/null +++ b/tests/hermes_cli/test_dep_ensure.py @@ -0,0 +1,43 @@ +from pathlib import Path +from unittest.mock import patch + + +def test_ensure_dependency_skips_when_present(): + """ensure_dependency is a no-op when the dep is already available.""" + from hermes_cli.dep_ensure import ensure_dependency + with patch("hermes_cli.dep_ensure.shutil") as mock_shutil: + mock_shutil.which.return_value = "/usr/bin/node" + result = ensure_dependency("node", interactive=False) + assert result is True + + +def test_ensure_dependency_returns_false_when_missing_noninteractive(): + """ensure_dependency returns False for missing dep in non-interactive mode.""" + from hermes_cli.dep_ensure import ensure_dependency + with patch("hermes_cli.dep_ensure.shutil") as mock_shutil: + mock_shutil.which.return_value = None + with patch("hermes_cli.dep_ensure._find_install_script", return_value=None): + result = ensure_dependency("node", interactive=False) + assert result is False + + +def test_find_install_script_from_checkout(tmp_path): + """_find_install_script finds scripts/install.sh in a git checkout.""" + from hermes_cli.dep_ensure import _find_install_script + scripts_dir = tmp_path / "scripts" + scripts_dir.mkdir() + (scripts_dir / "install.sh").write_text("#!/bin/bash", encoding="utf-8") + result = _find_install_script(package_dir=tmp_path / "hermes_cli", repo_root=tmp_path) + assert result is not None + assert result.name == "install.sh" + + +def test_find_install_script_from_wheel(tmp_path): + """_find_install_script finds bundled install.sh in a wheel.""" + from hermes_cli.dep_ensure import _find_install_script + bundled = tmp_path / "hermes_cli" / "scripts" + bundled.mkdir(parents=True) + (bundled / "install.sh").write_text("#!/bin/bash", encoding="utf-8") + result = _find_install_script(package_dir=tmp_path / "hermes_cli", repo_root=tmp_path) + assert result is not None + assert result.name == "install.sh" diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 575beba6c02..c01d25a6f0b 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -144,7 +144,8 @@ def _browser_candidate_path_dirs() -> list[str]: """Return ordered browser CLI PATH candidates shared by discovery and execution.""" hermes_home = get_hermes_home() hermes_node_bin = str(hermes_home / "node" / "bin") - return [hermes_node_bin, *list(_discover_homebrew_node_dirs()), *_SANE_PATH_DIRS] + hermes_nm_bin = str(hermes_home / "node_modules" / ".bin") + return [hermes_node_bin, hermes_nm_bin, *list(_discover_homebrew_node_dirs()), *_SANE_PATH_DIRS] def _merge_browser_path(existing_path: str = "") -> str: From 96917fb74ae4b9857671f7addb957db0774e4c9f Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 12:38:04 +0000 Subject: [PATCH 073/218] =?UTF-8?q?refactor:=20fix=20review=20findings=20?= =?UTF-8?q?=E2=80=94=20remove=20duplicate=20imports=20and=20deduplicate=20?= =?UTF-8?q?update=20command?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - banner.py: remove redundant `import json as _json` (json already at module level) - main.py: _cmd_update_pip now delegates to recommended_update_command_for_method instead of duplicating the uv-vs-pip detection logic - main.py: remove redundant `import subprocess as _sp` (subprocess already at module level) --- hermes_cli/banner.py | 3 +-- hermes_cli/main.py | 13 ++++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index 061992b4746..077ee41f0a2 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -190,11 +190,10 @@ def _fetch_pypi_latest(package: str = "hermes-agent") -> Optional[str]: """Fetch the latest version of a package from PyPI. Returns None on failure.""" try: import urllib.request - import json as _json url = f"https://pypi.org/pypi/{package}/json" req = urllib.request.Request(url, headers={"Accept": "application/json"}) with urllib.request.urlopen(req, timeout=5) as resp: - data = _json.loads(resp.read()) + data = json.loads(resp.read()) return data.get("info", {}).get("version") except Exception: return None diff --git a/hermes_cli/main.py b/hermes_cli/main.py index ea050126736..95947641aa5 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -7673,20 +7673,19 @@ def cmd_update(args): def _cmd_update_pip(args): """Update Hermes via pip (for PyPI installs).""" - import subprocess as _sp from hermes_cli import __version__ + from hermes_cli.config import recommended_update_command_for_method print(f"→ Current version: {__version__}") print("→ Checking PyPI for updates...") - uv = shutil.which("uv") - if uv: - cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"] - else: - cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"] + cmd_str = recommended_update_command_for_method("pip") + cmd = cmd_str.split() + if cmd[0] == "pip": + cmd = [sys.executable, "-m", "pip"] + cmd[1:] print(f"→ Running: {' '.join(cmd)}") - result = _sp.run(cmd) + result = subprocess.run(cmd) if result.returncode != 0: print("✗ Update failed") sys.exit(1) From 55a7c45d379f288fb6dc0eb4e484e82b73471b2c Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 13:01:37 +0000 Subject: [PATCH 074/218] fix(update): handle --check for pip installs (missed code path) _cmd_update_check() had its own `.git` gate separate from _cmd_update_impl. For pip installs, fork to _check_via_pypi() and display the result with the correct recommended_update_command(). --- hermes_cli/main.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 95947641aa5..bb372c396f1 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -7396,6 +7396,19 @@ def _cmd_update_check(): """Implement ``hermes update --check``: fetch and report without installing.""" git_dir = PROJECT_ROOT / ".git" if not git_dir.exists(): + from hermes_cli.config import detect_install_method, recommended_update_command + if detect_install_method(PROJECT_ROOT) == "pip": + from hermes_cli.banner import _check_via_pypi + result = _check_via_pypi() + if result is None: + print("✗ Could not reach PyPI to check for updates.") + sys.exit(1) + elif result == 0: + print("✓ Already up to date.") + else: + print(f"⚕ Update available on PyPI.") + print(f" Run '{recommended_update_command()}' to install.") + return print("✗ Not a git repository — cannot check for updates.") sys.exit(1) From e38a478c05e84f7fe563a1c9e980a0cebc8e4d02 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 13:22:21 +0000 Subject: [PATCH 075/218] chore(ci): pin actions/setup-node to SHA for supply-chain consistency --- .github/workflows/upload_to_pypi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/upload_to_pypi.yml b/.github/workflows/upload_to_pypi.yml index 9dce018d690..95477ccf01f 100644 --- a/.github/workflows/upload_to_pypi.yml +++ b/.github/workflows/upload_to_pypi.yml @@ -51,7 +51,7 @@ jobs: uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6 - name: Set up Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: '22' From c57709a3d68e7972bbc7180a1d6811f5f38546d1 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 13:28:21 +0000 Subject: [PATCH 076/218] feat: wire ensure_dependency into TUI and browser tool call sites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before: missing node → hard exit; missing browser → FileNotFoundError. After: both try ensure_dependency() first, which prompts interactively and delegates installation to install.sh --ensure. ripgrep and ffmpeg already degrade gracefully (grep fallback, skip conversion) so they don't need wiring. Also documents the design rationale in dep_ensure.py: detection and prompting live in Python (portable, instant, UX-integrated); only the actual installation delegates to install.sh (1900 lines of battle-tested OS/package-manager logic). --- hermes_cli/dep_ensure.py | 14 ++++++++++++-- hermes_cli/main.py | 7 +++++++ tools/browser_tool.py | 18 +++++++++++++++++- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/hermes_cli/dep_ensure.py b/hermes_cli/dep_ensure.py index 03ddd80ef84..c06fc6db80a 100644 --- a/hermes_cli/dep_ensure.py +++ b/hermes_cli/dep_ensure.py @@ -1,7 +1,17 @@ """Lazy dependency bootstrapper for non-Python runtime deps. -Wraps install.sh --ensure to install node, browser, ripgrep, ffmpeg -on first use. Prompts interactively unless told not to. +Detection and prompting live here in Python — not in install.sh — because: + 1. shutil.which() works on every platform; install.sh needs bash. + 2. Detection is instant; spawning bash for a "is node installed?" check is waste. + 3. Python controls the UX (rich prompts, non-interactive fallback, TTY detection). + +install.sh is still the *installation* backend because it has 1900 lines of +battle-tested OS detection and package-manager logic (apt/brew/pacman/dnf/ +zypper/Termux/…). Reimplementing that in Python would be huge duplication. + +Deps that degrade gracefully (ripgrep → grep fallback, ffmpeg → skip conversion) +don't need ensure_dependency wired in — only hard-fail sites do (TUI needs node, +browser tool needs agent-browser). """ from __future__ import annotations diff --git a/hermes_cli/main.py b/hermes_cli/main.py index bb372c396f1..0b5e79fe9d9 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1042,6 +1042,13 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]: if env_node and os.path.isfile(env_node) and os.access(env_node, os.X_OK): return env_node path = shutil.which(bin) + if not path and bin == "node": + try: + from hermes_cli.dep_ensure import ensure_dependency + if ensure_dependency("node"): + path = shutil.which("node") + except Exception: + pass if not path: print(f"{bin} not found — install Node.js to use the TUI.") sys.exit(1) diff --git a/tools/browser_tool.py b/tools/browser_tool.py index c01d25a6f0b..b3eb24ee044 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -1703,7 +1703,23 @@ def _find_agent_browser() -> str: _agent_browser_resolved = True return _cached_agent_browser - # Nothing found — cache the failure so subsequent calls don't re-scan. + # Nothing found — try lazy installation before giving up. + try: + from hermes_cli.dep_ensure import ensure_dependency + if ensure_dependency("browser"): + recheck = shutil.which("agent-browser") + if not recheck and extended_path: + recheck = shutil.which("agent-browser", path=extended_path) + if not recheck: + hermes_nm = str(get_hermes_home() / "node_modules" / ".bin") + recheck = shutil.which("agent-browser", path=hermes_nm) + if recheck: + _cached_agent_browser = recheck + _agent_browser_resolved = True + return recheck + except Exception: + pass + _agent_browser_resolved = True raise FileNotFoundError( "agent-browser CLI not found. Install it with: " From b1edf3dfc8948b5ff93f42d26395fa6f30393d9f Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 13:33:35 +0000 Subject: [PATCH 077/218] chore: gitignore hermes_cli/scripts/ (bundled at wheel build time) --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 6ae86265a60..37b1f602cc9 100644 --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,6 @@ mini-swe-agent/ result website/static/api/skills-index.json models-dev-upstream/ +hermes_cli/tui_dist/* +hermes_cli/scripts/ +docs/superpowers/* \ No newline at end of file From 99b81cd54b99d4c66812b1d076e593f566432065 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 13:43:20 +0000 Subject: [PATCH 078/218] feat: add `hermes postinstall` command for pip users One-shot bootstrap that installs non-Python deps (node, browser, ripgrep, ffmpeg) via ensure_dependency(), then runs setup if no provider is configured. Closes the gap between `pip install` and the full user-facing experience. Also fixes 3 pre-existing test regressions caused by earlier commits: - test_recommended_update_command: mock detect_install_method for git env - test_check_for_updates_no_git_dir: now falls back to PyPI, not None - test_plist_path_includes_node_modules_bin: skip when dir absent --- hermes_cli/main.py | 31 ++++++++++++++++++- tests/hermes_cli/test_managed_installs.py | 3 +- tests/hermes_cli/test_update_check.py | 7 +++-- .../hermes_cli/test_update_gateway_restart.py | 5 ++- 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 0b5e79fe9d9..121b77b0f91 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1713,6 +1713,24 @@ def cmd_setup(args): run_setup_wizard(args) +def cmd_postinstall(args): + """One-shot bootstrap for pip users: install non-Python deps + run setup.""" + from hermes_cli.dep_ensure import ensure_dependency + + print("⚕ Hermes post-install bootstrap") + print() + + for dep in ("node", "browser", "ripgrep", "ffmpeg"): + ensure_dependency(dep) + + if not _has_any_provider_configured(): + print() + cmd_setup(args) + else: + print() + print("✓ Post-install complete.") + + def cmd_model(args): """Select default model — starts with provider selection, then model picker.""" _require_tty("model") @@ -9583,7 +9601,7 @@ _BUILTIN_SUBCOMMANDS = frozenset( "config", "cron", "curator", "dashboard", "debug", "doctor", "dump", "fallback", "gateway", "hooks", "import", "insights", "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", - "model", "pairing", "plugins", "profile", "proxy", "sessions", "setup", + "model", "pairing", "plugins", "postinstall", "profile", "proxy", "sessions", "setup", "skills", "slack", "status", "tools", "uninstall", "update", "version", "webhook", "whatsapp", "chat", # Help-ish invocations — plugin commands not being listed in @@ -10022,6 +10040,17 @@ def main(): ) setup_parser.set_defaults(func=cmd_setup) + # ========================================================================= + # postinstall command + # ========================================================================= + postinstall_parser = subparsers.add_parser( + "postinstall", + help="Bootstrap non-Python deps for pip installs (node, browser, ripgrep, ffmpeg)", + description="One-shot post-install for pip users. Installs system " + "dependencies that pip cannot provide, then runs setup if needed.", + ) + postinstall_parser.set_defaults(func=cmd_postinstall) + # ========================================================================= # whatsapp command # ========================================================================= diff --git a/tests/hermes_cli/test_managed_installs.py b/tests/hermes_cli/test_managed_installs.py index c6b5d792ce0..d2cf2947c6d 100644 --- a/tests/hermes_cli/test_managed_installs.py +++ b/tests/hermes_cli/test_managed_installs.py @@ -29,7 +29,8 @@ def test_format_managed_message_homebrew(monkeypatch): def test_recommended_update_command_defaults_to_hermes_update(monkeypatch): monkeypatch.delenv("HERMES_MANAGED", raising=False) - assert recommended_update_command() == "hermes update" + with patch("hermes_cli.config.detect_install_method", return_value="git"): + assert recommended_update_command() == "hermes update" def test_cmd_update_blocks_managed_homebrew(monkeypatch, capsys): diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py index 2bdc9b24621..92cd2d2e14c 100644 --- a/tests/hermes_cli/test_update_check.py +++ b/tests/hermes_cli/test_update_check.py @@ -59,7 +59,7 @@ def test_check_for_updates_expired_cache(tmp_path, monkeypatch): def test_check_for_updates_no_git_dir(tmp_path, monkeypatch): - """Returns None when .git directory doesn't exist anywhere.""" + """Falls back to PyPI check when .git directory doesn't exist anywhere.""" import hermes_cli.banner as banner # Create a fake banner.py so the fallback path also has no .git @@ -70,8 +70,9 @@ def test_check_for_updates_no_git_dir(tmp_path, monkeypatch): monkeypatch.setattr(banner, "__file__", str(fake_banner)) monkeypatch.setenv("HERMES_HOME", str(tmp_path)) with patch("hermes_cli.banner.subprocess.run") as mock_run: - result = banner.check_for_updates() - assert result is None + with patch("hermes_cli.banner._check_via_pypi", return_value=0): + result = banner.check_for_updates() + assert result == 0 mock_run.assert_not_called() diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py index 34c878eca79..b53b1463624 100644 --- a/tests/hermes_cli/test_update_gateway_restart.py +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -178,8 +178,11 @@ class TestLaunchdPlistPath: raise AssertionError("PATH key not found in plist") def test_plist_path_includes_node_modules_bin(self): + node_bin_dir = gateway_cli.PROJECT_ROOT / "node_modules" / ".bin" + if not node_bin_dir.is_dir(): + pytest.skip("node_modules/.bin not present in this checkout") plist = gateway_cli.generate_launchd_plist() - node_bin = str(gateway_cli.PROJECT_ROOT / "node_modules" / ".bin") + node_bin = str(node_bin_dir) lines = plist.splitlines() for i, line in enumerate(lines): if "PATH" in line.strip(): From 164a77dec9b74955c17401e9cf79f5470960b015 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 13:45:46 +0000 Subject: [PATCH 079/218] docs: add pip install path to installation, quickstart, updating, and CLI reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document pip install hermes-agent as a first-class install option. Clarify that PyPI releases track tagged versions (major/minor), not every commit on main — git installer is for bleeding-edge. --- website/docs/getting-started/installation.md | 32 ++++++++++++++++++-- website/docs/getting-started/quickstart.md | 10 +++++- website/docs/getting-started/updating.md | 30 ++++++++++++++++-- website/docs/reference/cli-commands.md | 4 ++- 4 files changed, 69 insertions(+), 7 deletions(-) diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md index c8db40a9137..14bd95151c1 100644 --- a/website/docs/getting-started/installation.md +++ b/website/docs/getting-started/installation.md @@ -10,7 +10,30 @@ Get Hermes Agent up and running in under two minutes with the one-line installer ## Quick Install -### Linux / macOS / WSL2 +### pip (recommended for most users) + +```bash +pip install hermes-agent +``` + +This gives you the full Hermes Agent — CLI, web dashboard, and TUI — with zero external dependencies for core usage. Node.js, browser engines, and other optional tools are bootstrapped lazily on first use (e.g. when you run `hermes --tui` or use browser tools). + +PyPI releases track **tagged versions** (major and minor releases), not every commit on `main`. If you want bleeding-edge changes as they land, use the git install below. + +After installing, run: + +```bash +hermes setup # interactive wizard — configures your LLM provider and API key +hermes # start chatting +``` + +:::tip +If you have [uv](https://docs.astral.sh/uv/) installed, `uv pip install hermes-agent` is faster. +::: + +### One-Line Installer (Linux / macOS / WSL2) + +For a git-based install that tracks `main` and gives you the latest changes immediately: ```bash curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash @@ -80,7 +103,8 @@ Where the installer puts things depends on whether you're installing as a normal | Installer | Code lives at | `hermes` binary | Data directory | |---|---|---|---| -| Per-user (normal) | `~/.hermes/hermes-agent/` | `~/.local/bin/hermes` (symlink) | `~/.hermes/` | +| pip install | Python site-packages | `~/.local/bin/hermes` (console_scripts) | `~/.hermes/` | +| Per-user (git installer) | `~/.hermes/hermes-agent/` | `~/.local/bin/hermes` (symlink) | `~/.hermes/` | | Root-mode (`sudo curl … \| sudo bash`) | `/usr/local/lib/hermes-agent/` | `/usr/local/bin/hermes` | `/root/.hermes/` (or `$HERMES_HOME`) | The root-mode **FHS layout** (`/usr/local/lib/…`, `/usr/local/bin/hermes`) matches where other system-wide developer tools land on Linux. It's useful for shared-machine deployments where one system install should serve every user. Per-user config (auth, skills, sessions) still lives under each user's `~/.hermes/` or explicit `HERMES_HOME`. @@ -108,7 +132,9 @@ hermes setup # Or run the full setup wizard to configure everything at ## Prerequisites -The only prerequisite is **Git**. The installer automatically handles everything else: +**pip install:** No prerequisites beyond Python 3.11+. Everything else is handled automatically. + +**Git installer:** The only prerequisite is **Git**. The installer automatically handles everything else: - **uv** (fast Python package manager) - **Python 3.11** (via uv, no sudo needed) diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index f5a089ee724..341618c614c 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -48,7 +48,15 @@ Pick the row that matches your goal: ## 1. Install Hermes Agent -Run the one-line installer: +**Option A — pip (simplest):** + +```bash +pip install hermes-agent +``` + +PyPI releases track tagged versions (major/minor releases), not every commit on `main`. For bleeding-edge, use Option B. + +**Option B — git installer (tracks main branch):** ```bash # Linux / macOS / WSL2 / Android (Termux) diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md index aa2a426db99..83b3eb3221c 100644 --- a/website/docs/getting-started/updating.md +++ b/website/docs/getting-started/updating.md @@ -8,19 +8,36 @@ description: "How to update Hermes Agent to the latest version or uninstall it" ## Updating +### Git installs + Update to the latest version with a single command: ```bash hermes update ``` -This pulls the latest code, updates dependencies, and prompts you to configure any new options that were added since your last update. +This pulls the latest code from `main`, updates dependencies, and prompts you to configure any new options that were added since your last update. + +### pip installs + +PyPI releases track **tagged versions** (major and minor releases), not every commit on `main`. Check for updates and upgrade with: + +```bash +hermes update --check # see if a newer release is on PyPI +hermes update # runs pip install --upgrade hermes-agent +``` + +Or manually: + +```bash +pip install --upgrade hermes-agent # or: uv pip install --upgrade hermes-agent +``` :::tip `hermes update` automatically detects new configuration options and prompts you to add them. If you skipped that prompt, you can manually run `hermes config check` to see missing options, then `hermes config migrate` to interactively add them. ::: -### What happens during an update +### What happens during an update (git installs) When you run `hermes update`, the following steps occur: @@ -189,12 +206,21 @@ See [Nix Setup](./nix-setup.md) for more details. ## Uninstalling +### Git installs + ```bash hermes uninstall ``` The uninstaller gives you the option to keep your configuration files (`~/.hermes/`) for a future reinstall. +### pip installs + +```bash +pip uninstall hermes-agent +rm -rf ~/.hermes # Optional — keep if you plan to reinstall +``` + ### Manual Uninstall ```bash diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index aa12f431b62..3b5b7d2e925 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -76,7 +76,7 @@ hermes [global-options] [subcommand/options] | `hermes profile` | Manage profiles — multiple isolated Hermes instances. | | `hermes completion` | Print shell completion scripts (bash/zsh/fish). | | `hermes version` | Show version information. | -| `hermes update` | Pull latest code and reinstall dependencies. `--check` prints commit diff without pulling; `--backup` takes a pre-pull `HERMES_HOME` snapshot. | +| `hermes update` | Pull latest code and reinstall dependencies (git installs), or check PyPI and `pip install --upgrade` (pip installs). `--check` previews without installing; `--backup` takes a pre-pull `HERMES_HOME` snapshot. | | `hermes uninstall` | Remove Hermes from the system. | ## `hermes chat` @@ -1188,6 +1188,8 @@ hermes update [--check] [--backup] [--restart-gateway] Pulls the latest `hermes-agent` code and reinstalls dependencies in your venv, then re-runs the post-install hooks (MCP servers, skills sync, completion install). Safe to run on a live install. +**pip installs:** `hermes update` detects pip-based installations automatically — it queries PyPI for the latest release and runs `pip install --upgrade hermes-agent` instead of `git pull`. PyPI releases track tagged versions (major/minor releases), not every commit on `main`. Use `--check` to see if a newer PyPI release is available without installing. + | Option | Description | |--------|-------------| | `--check` | Print the current commit and the latest `origin/main` commit side by side, and exit 0 if in sync or 1 if behind. Does not pull, install, or restart anything. | From 47c0efe1c08ba6f0a70d07b7f353e1ad71e69678 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 13:47:43 +0000 Subject: [PATCH 080/218] refactor: DRY cleanup from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - dep_ensure.py: use get_hermes_home() instead of hand-rolled env var - dep_ensure.py: add "chrome" to browser name list (was inconsistent with browser_tool.py) - main.py _cmd_update_check: use detect_install_method() directly instead of redundant .git check - main.py _cmd_update_pip: build command list directly instead of fragile split() on display string - banner.py: rename _check_via_pypi → check_via_pypi (cross-module public API) --- hermes_cli/banner.py | 4 +-- hermes_cli/dep_ensure.py | 6 ++-- hermes_cli/main.py | 39 ++++++++++++---------- tests/hermes_cli/test_banner_pip_update.py | 24 ++++++------- tests/hermes_cli/test_update_check.py | 2 +- 5 files changed, 39 insertions(+), 36 deletions(-) diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index 077ee41f0a2..ef592beb7fd 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -199,7 +199,7 @@ def _fetch_pypi_latest(package: str = "hermes-agent") -> Optional[str]: return None -def _check_via_pypi() -> Optional[int]: +def check_via_pypi() -> Optional[int]: """Compare installed version against PyPI latest. Returns 0 if up-to-date, 1 if behind, None on failure. @@ -255,7 +255,7 @@ def check_for_updates() -> Optional[int]: if not (repo_dir / ".git").exists(): repo_dir = hermes_home / "hermes-agent" if not (repo_dir / ".git").exists(): - behind = _check_via_pypi() + behind = check_via_pypi() else: behind = _check_via_local_git(repo_dir) diff --git a/hermes_cli/dep_ensure.py b/hermes_cli/dep_ensure.py index c06fc6db80a..3312726c36d 100644 --- a/hermes_cli/dep_ensure.py +++ b/hermes_cli/dep_ensure.py @@ -41,15 +41,15 @@ _DEP_DESCRIPTIONS = { def _has_system_browser() -> bool: - for name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"): + for name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser", "chrome"): if shutil.which(name): return True return False def _has_hermes_agent_browser() -> bool: - hermes_home = os.environ.get("HERMES_HOME", str(Path.home() / ".hermes")) - return (Path(hermes_home) / "node_modules" / ".bin" / "agent-browser").is_file() + from hermes_constants import get_hermes_home + return (get_hermes_home() / "node_modules" / ".bin" / "agent-browser").is_file() def _find_install_script( diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 121b77b0f91..41c4a23f932 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -7419,21 +7419,24 @@ def _finalize_update_output(state): def _cmd_update_check(): """Implement ``hermes update --check``: fetch and report without installing.""" + from hermes_cli.config import detect_install_method + method = detect_install_method(PROJECT_ROOT) + if method == "pip": + from hermes_cli.config import recommended_update_command + from hermes_cli.banner import check_via_pypi + result = check_via_pypi() + if result is None: + print("✗ Could not reach PyPI to check for updates.") + sys.exit(1) + elif result == 0: + print("✓ Already up to date.") + else: + print("⚕ Update available on PyPI.") + print(f" Run '{recommended_update_command()}' to install.") + return + git_dir = PROJECT_ROOT / ".git" if not git_dir.exists(): - from hermes_cli.config import detect_install_method, recommended_update_command - if detect_install_method(PROJECT_ROOT) == "pip": - from hermes_cli.banner import _check_via_pypi - result = _check_via_pypi() - if result is None: - print("✗ Could not reach PyPI to check for updates.") - sys.exit(1) - elif result == 0: - print("✓ Already up to date.") - else: - print(f"⚕ Update available on PyPI.") - print(f" Run '{recommended_update_command()}' to install.") - return print("✗ Not a git repository — cannot check for updates.") sys.exit(1) @@ -7712,15 +7715,15 @@ def cmd_update(args): def _cmd_update_pip(args): """Update Hermes via pip (for PyPI installs).""" from hermes_cli import __version__ - from hermes_cli.config import recommended_update_command_for_method print(f"→ Current version: {__version__}") print("→ Checking PyPI for updates...") - cmd_str = recommended_update_command_for_method("pip") - cmd = cmd_str.split() - if cmd[0] == "pip": - cmd = [sys.executable, "-m", "pip"] + cmd[1:] + uv = shutil.which("uv") + if uv: + cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"] + else: + cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"] print(f"→ Running: {' '.join(cmd)}") result = subprocess.run(cmd) diff --git a/tests/hermes_cli/test_banner_pip_update.py b/tests/hermes_cli/test_banner_pip_update.py index a0e9266f698..205c97488a9 100644 --- a/tests/hermes_cli/test_banner_pip_update.py +++ b/tests/hermes_cli/test_banner_pip_update.py @@ -1,29 +1,29 @@ from unittest.mock import patch -def test_check_via_pypi_detects_update(): - """_check_via_pypi returns 1 when PyPI has newer version.""" - from hermes_cli.banner import _check_via_pypi +def testcheck_via_pypi_detects_update(): + """check_via_pypi returns 1 when PyPI has newer version.""" + from hermes_cli.banner import check_via_pypi with patch("hermes_cli.banner.VERSION", "0.12.0"): with patch("hermes_cli.banner._fetch_pypi_latest", return_value="0.13.0"): - result = _check_via_pypi() + result = check_via_pypi() assert result == 1 -def test_check_via_pypi_up_to_date(): - """_check_via_pypi returns 0 when versions match.""" - from hermes_cli.banner import _check_via_pypi +def testcheck_via_pypi_up_to_date(): + """check_via_pypi returns 0 when versions match.""" + from hermes_cli.banner import check_via_pypi with patch("hermes_cli.banner.VERSION", "0.13.0"): with patch("hermes_cli.banner._fetch_pypi_latest", return_value="0.13.0"): - result = _check_via_pypi() + result = check_via_pypi() assert result == 0 -def test_check_via_pypi_network_failure(): - """_check_via_pypi returns None on network error.""" - from hermes_cli.banner import _check_via_pypi +def testcheck_via_pypi_network_failure(): + """check_via_pypi returns None on network error.""" + from hermes_cli.banner import check_via_pypi with patch("hermes_cli.banner._fetch_pypi_latest", return_value=None): - result = _check_via_pypi() + result = check_via_pypi() assert result is None diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py index 92cd2d2e14c..8a68d6a178d 100644 --- a/tests/hermes_cli/test_update_check.py +++ b/tests/hermes_cli/test_update_check.py @@ -70,7 +70,7 @@ def test_check_for_updates_no_git_dir(tmp_path, monkeypatch): monkeypatch.setattr(banner, "__file__", str(fake_banner)) monkeypatch.setenv("HERMES_HOME", str(tmp_path)) with patch("hermes_cli.banner.subprocess.run") as mock_run: - with patch("hermes_cli.banner._check_via_pypi", return_value=0): + with patch("hermes_cli.banner.check_via_pypi", return_value=0): result = banner.check_for_updates() assert result == 0 mock_run.assert_not_called() From a480d345e63b114e9de1e9ceed746b7b9e21f0cb Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Fri, 15 May 2026 13:49:58 +0000 Subject: [PATCH 081/218] docs: add hermes postinstall to installation + quickstart, fix update --check description - installation.md: add tip about `hermes postinstall` for upfront dep install - quickstart.md: show `hermes postinstall` in pip install flow - updating.md: fix --check description to mention PyPI path for pip installs --- website/docs/getting-started/installation.md | 4 ++++ website/docs/getting-started/quickstart.md | 1 + website/docs/getting-started/updating.md | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md index 14bd95151c1..a88f4c8bd1c 100644 --- a/website/docs/getting-started/installation.md +++ b/website/docs/getting-started/installation.md @@ -27,6 +27,10 @@ hermes setup # interactive wizard — configures your LLM provider and API key hermes # start chatting ``` +:::tip Optional: install everything upfront +`hermes postinstall` installs Node.js, browser engines, ripgrep, and ffmpeg in one shot — then runs the setup wizard. Use this if you want the full experience (TUI, browser tools, voice) without waiting for lazy installs on first use. +::: + :::tip If you have [uv](https://docs.astral.sh/uv/) installed, `uv pip install hermes-agent` is faster. ::: diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index 341618c614c..80eaf3589ca 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -52,6 +52,7 @@ Pick the row that matches your goal: ```bash pip install hermes-agent +hermes postinstall # optional: installs Node.js, browser, ripgrep, ffmpeg + runs setup ``` PyPI releases track tagged versions (major/minor releases), not every commit on `main`. For bleeding-edge, use Option B. diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md index 83b3eb3221c..d4ced41a4d7 100644 --- a/website/docs/getting-started/updating.md +++ b/website/docs/getting-started/updating.md @@ -49,7 +49,7 @@ When you run `hermes update`, the following steps occur: ### Preview-only: `hermes update --check` -Want to know if you're behind `origin/main` before actually pulling? Run `hermes update --check` — it fetches, prints your local commit and the latest remote commit side-by-side, and exits `0` if in sync or `1` if behind. No files are modified, no gateway is restarted. Useful in scripts and cron jobs that gate on "is there an update". +Want to know if an update is available before pulling? Run `hermes update --check` — for git installs it fetches and compares commits against `origin/main`; for pip installs it queries PyPI for the latest release. No files are modified, no gateway is restarted. Useful in scripts and cron jobs that gate on "is there an update". ### Full pre-update backup: `--backup` From 233d4170cf7b6421939d4ae2d7adc8f3466c347f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:45:59 -0700 Subject: [PATCH 082/218] docs(xai): link OAuth-over-SSH guide from xAI provider surfaces (#26610) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #26592. The new docs/guides/oauth-over-ssh.md page was linked from the two SSH-specific sections of the xAI Grok OAuth guide but was missing from the surfaces a user is more likely to hit first: - guides/xai-grok-oauth.md 'See Also' — add the SSH guide at the top with a short qualifier so remote users notice it before clicking through. - integrations/providers.md xAI Grok OAuth callout — append the SSH guide link alongside the existing xAI OAuth guide link. - user-guide/configuration.md xai-oauth tip — same. Docs build: zero warnings on touched files. --- website/docs/guides/xai-grok-oauth.md | 1 + website/docs/integrations/providers.md | 2 +- website/docs/user-guide/configuration.md | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md index 95167a2430c..67d31c929ad 100644 --- a/website/docs/guides/xai-grok-oauth.md +++ b/website/docs/guides/xai-grok-oauth.md @@ -221,6 +221,7 @@ This clears both the singleton OAuth entry in `auth.json` and any credential-poo ## See Also +- [OAuth over SSH / Remote Hosts](./oauth-over-ssh.md) — required reading if Hermes is on a different machine than your browser - [AI Providers reference](../integrations/providers.md) - [Environment Variables](../reference/environment-variables.md) - [Configuration](../user-guide/configuration.md) diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index e7b2e5ab86d..248d17c5fac 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -331,7 +331,7 @@ When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoin xAI is wired through the Responses API (`codex_responses` transport) for automatic reasoning support on Grok 4 models — no `reasoning_effort` parameter needed, the server reasons by default. Set `XAI_API_KEY` in `~/.hermes/.env` and pick xAI in `hermes model`, or drop `grok` as a shortcut into `/model grok-4-1-fast-reasoning`. -SuperGrok subscribers can sign in with browser OAuth instead of using an API key — pick **xAI Grok OAuth (SuperGrok Subscription)** in `hermes model`, or run `hermes auth add xai-oauth`. The same OAuth bearer token is automatically reused by direct-to-xAI tools (TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md) for the full flow. +SuperGrok subscribers can sign in with browser OAuth instead of using an API key — pick **xAI Grok OAuth (SuperGrok Subscription)** in `hermes model`, or run `hermes auth add xai-oauth`. The same OAuth bearer token is automatically reused by direct-to-xAI tools (TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md) for the full flow — and if Hermes runs on a remote host, also see [OAuth over SSH / Remote Hosts](../guides/oauth-over-ssh.md) for the required `ssh -L` tunnel. When using xAI as a provider (any base URL containing `x.ai`), Hermes automatically enables prompt caching by sending the `x-grok-conv-id` header with every API request. This routes requests to the same server within a conversation session, allowing xAI's infrastructure to reuse cached system prompts and conversation history. diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index d529c8af687..77e5d74ad42 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -820,7 +820,7 @@ Available providers for auxiliary tasks: `auto`, `main`, plus any provider in th ::: :::tip xAI Grok OAuth -`xai-oauth` logs in via browser OAuth for SuperGrok subscribers (no API key needed). Run `hermes model` and select **xAI Grok OAuth (SuperGrok Subscription)** to authenticate. The same OAuth token is reused for every direct-to-xAI surface (chat, auxiliary tasks, TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md). +`xai-oauth` logs in via browser OAuth for SuperGrok subscribers (no API key needed). Run `hermes model` and select **xAI Grok OAuth (SuperGrok Subscription)** to authenticate. The same OAuth token is reused for every direct-to-xAI surface (chat, auxiliary tasks, TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md), and if Hermes is on a remote host see [OAuth over SSH / Remote Hosts](../guides/oauth-over-ssh.md). ::: :::warning `"main"` is for auxiliary tasks only From 887ba1fb03d78f8922b32e7d17dfb1e0998d9315 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:47:30 -0700 Subject: [PATCH 083/218] ci: reject PRs with no common ancestor on main (#26611) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Catches the failure mode that produced #25045: a contributor PR whose branch had been disconnected from main's history (likely an accidental 'git checkout --orphan' or '.git/' re-init). GitHub's merge UI does not refuse merges of unrelated histories, so the PR landed cleanly with its intended one-file change but its parent-less root commit (413990c94) got grafted into main as a second root. The merge resolution itself was correct — main's content won for every conflicting file — but ~1500 files' worth of git blame collapsed onto that single commit. Implementation: 'git merge-base origin/main HEAD' exits non-zero and prints nothing when the two commits share no ancestor. Check both conditions and fail with a clear message + recovery steps. Verified: against the historic state of PR #25045 (base 5d90386ba, head 1149e75db), 'git merge-base' returns empty with exit 1, so the new check would have rejected it. --- .github/workflows/history-check.yml | 58 +++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 .github/workflows/history-check.yml diff --git a/.github/workflows/history-check.yml b/.github/workflows/history-check.yml new file mode 100644 index 00000000000..bd66f19404e --- /dev/null +++ b/.github/workflows/history-check.yml @@ -0,0 +1,58 @@ +name: History Check + +# Rejects PRs whose branch has no common ancestor with main. +# +# In May 2026 PR #25045 was merged from a branch that had been disconnected +# from main's history (likely an accidental `git checkout --orphan` or +# `.git/` re-init). GitHub's merge UI does not refuse merges of unrelated +# histories, so the PR landed cleanly with the intended one-file change — +# but its parent-less root commit (413990c94) got grafted into main as a +# second root, and ~1500 files' worth of `git blame` history collapsed +# onto that single commit. +# +# This check catches the failure mode by requiring `git merge-base` between +# the PR head and main to be non-empty. + +on: + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + check-common-ancestor: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 0 # full history both sides for merge-base + + - name: Reject PRs with no common ancestor on main + run: | + # `git merge-base` exits non-zero AND prints nothing when the two + # commits share no ancestor. We check both conditions explicitly + # so the failure message is clear regardless of which signal fires + # first. + if ! BASE=$(git merge-base origin/main HEAD 2>/dev/null) || [ -z "$BASE" ]; then + echo "" + echo "::error::This PR has no common ancestor with main." + echo "" + echo "Your branch's history is disconnected from main. Common causes:" + echo " - the branch was created with 'git checkout --orphan'" + echo " - '.git/' was re-initialized at some point during the work" + echo " - the branch was force-pushed from an unrelated repository" + echo "" + echo "Merging an unrelated-history PR grafts a parent-less root commit" + echo "into main and collapses git blame for every file in that snapshot." + echo "Reference: PR #25045 caused this and re-rooted blame on ~1500" + echo "files to a single orphan commit." + echo "" + echo "To fix, rebase your changes onto current main:" + echo " git fetch origin main" + echo " git checkout -b fix-branch origin/main" + echo " # re-apply your changes (cherry-pick, copy files, etc.)" + echo " git push -f origin fix-branch" + exit 1 + fi + echo "::notice::Common ancestor with main: $BASE" From 42070ecefb9e9da3adec6d536d130d9dc3b82560 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:58:23 -0700 Subject: [PATCH 084/218] feat(skills/notion): overhaul for Notion Developer Platform (May 2026) (#26612) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(skills/notion): overhaul for Notion Developer Platform (May 2026) Notion shipped its Developer Platform on May 13, 2026: ntn CLI, Workers, Markdown API, bidirectional webhooks, agent tools. The existing skill only covered curl + integration token CRUD, so it didn't surface any of the new ergonomics — particularly the /markdown endpoints (much easier for agents to consume) and the ntn CLI for headless API + Workers management. This rewrite (v1.0.0 -> v2.0.0): - Splits setup into Path A (HTTP, cross-platform incl. Windows), Path B (ntn CLI on macOS/Linux, with NOTION_API_TOKEN env var for headless), and Path C (Windows fallback — HTTP API or WSL2; native ntn is 'coming soon'). - Keeps the full curl reference (still the only Windows-compatible path). - Adds /markdown endpoints — GET and PATCH page-as-markdown, plus POST /v1/pages with a markdown body param. Agent-friendly, no CLI required. - Adds ntn CLI cheat sheet for raw API shorthand, file uploads, and workspace flags. - Adds Notion Workers section: scaffold, tool/webhook capability shapes, lifecycle commands. Gated on Business/Enterprise plans + macOS/Linux. - Adds Notion-flavored Markdown reference (callouts, toggles, columns, mentions, colors) for the /markdown endpoints. - Adds a 'choose the right path' decision table at the bottom. - Notes the new efficient Notion MCP server as an optional wiring path. Auto-generated docs page regenerated via website/scripts/generate-skill-docs.py. * docs(skills-catalog): update notion description for v2.0.0 --- skills/productivity/notion/SKILL.md | 356 ++++++++++++++++-- website/docs/reference/skills-catalog.md | 2 +- .../productivity/productivity-notion.md | 354 +++++++++++++++-- 3 files changed, 632 insertions(+), 80 deletions(-) diff --git a/skills/productivity/notion/SKILL.md b/skills/productivity/notion/SKILL.md index b645c088f28..83222ffd938 100644 --- a/skills/productivity/notion/SKILL.md +++ b/skills/productivity/notion/SKILL.md @@ -1,35 +1,158 @@ --- name: notion -description: "Notion API via curl: pages, databases, blocks, search." -version: 1.0.0 +description: "Notion API + ntn CLI: pages, databases, markdown, Workers." +version: 2.0.0 author: community license: MIT platforms: [linux, macos, windows] -metadata: - hermes: - tags: [Notion, Productivity, Notes, Database, API] - homepage: https://developers.notion.com prerequisites: env_vars: [NOTION_API_KEY] +metadata: + hermes: + tags: [Notion, Productivity, Notes, Database, API, CLI, Workers] + homepage: https://developers.notion.com --- -# Notion API +# Notion -Use the Notion API via curl to create, read, update pages, databases (data sources), and blocks. No extra tools needed — just curl and a Notion API key. +Talk to Notion two ways. Same integration token works for both — pick by what's available. -## Prerequisites +◆ **`ntn` CLI** — Notion's official CLI. Shorter syntax, one-line file uploads, required for Workers. macOS + Linux only as of May 2026 (Windows support "coming soon"). **Default when installed.** +◆ **HTTP + curl** — works everywhere including Windows. **Default fallback** when `ntn` isn't installed. + +## Setup + +### 1. Get an integration token (required for both paths) 1. Create an integration at https://notion.so/my-integrations 2. Copy the API key (starts with `ntn_` or `secret_`) -3. Store it in `~/.hermes/.env`: +3. Store in `~/.hermes/.env`: ``` NOTION_API_KEY=ntn_your_key_here ``` -4. **Important:** Share target pages/databases with your integration in Notion (click "..." → "Connect to" → your integration name) +4. **Share target pages/databases with the integration** in Notion: page menu `...` → `Connect to` → your integration name. Without this, the API returns 404 for that page even though it exists. + +### 2. Install `ntn` (preferred path on macOS / Linux) + +```bash +# Recommended +curl -fsSL https://ntn.dev | bash + +# Or via npm (needs Node 22+, npm 10+) +npm install --global ntn + +ntn --version # verify +``` + +**Skip `ntn login` — use the integration token instead.** This works headlessly, no browser needed: +```bash +export NOTION_API_TOKEN=$NOTION_API_KEY # ntn reads NOTION_API_TOKEN +export NOTION_KEYRING=0 # don't try to use the OS keychain +``` + +Add those exports to your shell profile (or to `~/.hermes/.env`) so every session inherits them. + +### 3. Choose path at runtime + +```bash +if command -v ntn >/dev/null 2>&1; then + # use ntn +else + # fall back to curl +fi +``` + +Windows users: skip step 2 entirely until native `ntn` ships — Path B works fine. If you want CLI ergonomics now, install `ntn` inside WSL2. ## API Basics -All requests use this pattern: +`Notion-Version: 2025-09-03` is required on all HTTP requests. `ntn` handles this for you. In this version, what users call "databases" are called **data sources** in the API. + +## Path A — `ntn` CLI (preferred, macOS / Linux) + +### Raw API calls (shorthand for curl) +```bash +ntn api v1/users # GET +ntn api v1/pages parent[page_id]=abc123 \ # POST with inline body + properties[title][0][text][content]="Notes" +ntn api v1/pages/abc123 -X PATCH archived:=true # PATCH; := is non-string (bool/num/null) +``` + +Syntax notes: +- `key=value` — string fields +- `key[nested]=value` — nested object fields +- `key:=value` — typed assignment (booleans, numbers, null, arrays) + +### Search +```bash +ntn api v1/search query="page title" +``` + +### Read page metadata +```bash +ntn api v1/pages/{page_id} +``` + +### Read page as Markdown (agent-friendly) +```bash +ntn api v1/pages/{page_id}/markdown +``` + +### Read page content as blocks +```bash +ntn api v1/blocks/{page_id}/children +``` + +### Create page from Markdown +```bash +ntn api v1/pages \ + parent[page_id]=xxx \ + properties[title][0][text][content]="Notes from meeting" \ + markdown="# Agenda + +- Q3 roadmap +- Hiring" +``` + +### Patch a page with Markdown +```bash +ntn api v1/pages/{page_id}/markdown -X PATCH \ + markdown="## Update + +Shipped the prototype." +``` + +### Query a database (data source) +```bash +ntn api v1/data_sources/{data_source_id}/query -X POST \ + filter[property]=Status filter[select][equals]=Active +``` + +For complex queries with `sorts`, multiple filter clauses, or compound logic, pipe JSON in: +```bash +echo '{"filter": {"property": "Status", "select": {"equals": "Active"}}, "sorts": [{"property": "Date", "direction": "descending"}]}' | \ + ntn api v1/data_sources/{data_source_id}/query -X POST --json - +``` + +### File uploads (one-liner — biggest CLI win) +```bash +ntn files create < photo.png +ntn files create --external-url https://example.com/photo.png +ntn files list +``` + +Compare to the 3-step HTTP flow (create upload → PUT bytes → reference). + +### Useful env vars +| Var | Effect | +|---|---| +| `NOTION_API_TOKEN` | Auth token (overrides keychain) — set this to your integration token | +| `NOTION_KEYRING=0` | File-based creds at `~/.config/notion/auth.json` instead of OS keychain | +| `NOTION_WORKSPACE_ID` | Skip the workspace picker prompt | + +## Path B — HTTP + curl (cross-platform, default on Windows) + +All requests share this pattern: ```bash curl -s -X GET "https://api.notion.com/v1/..." \ @@ -38,12 +161,9 @@ curl -s -X GET "https://api.notion.com/v1/..." \ -H "Content-Type: application/json" ``` -The `Notion-Version` header is required. This skill uses `2025-09-03` (latest). In this version, databases are called "data sources" in the API. - -## Common Operations +On Windows the `curl` shipped with Windows 10+ works as-is. PowerShell users can also use `Invoke-RestMethod`. ### Search - ```bash curl -s -X POST "https://api.notion.com/v1/search" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -52,24 +172,56 @@ curl -s -X POST "https://api.notion.com/v1/search" \ -d '{"query": "page title"}' ``` -### Get Page - +### Read page metadata ```bash curl -s "https://api.notion.com/v1/pages/{page_id}" \ -H "Authorization: Bearer $NOTION_API_KEY" \ -H "Notion-Version: 2025-09-03" ``` -### Get Page Content (blocks) +### Read page as Markdown (agent-friendly) +Easier to feed to a model than block JSON. + +```bash +curl -s "https://api.notion.com/v1/pages/{page_id}/markdown" \ + -H "Authorization: Bearer $NOTION_API_KEY" \ + -H "Notion-Version: 2025-09-03" +``` + +### Read page content as blocks (when you need structure) ```bash curl -s "https://api.notion.com/v1/blocks/{page_id}/children" \ -H "Authorization: Bearer $NOTION_API_KEY" \ -H "Notion-Version: 2025-09-03" ``` -### Create Page in a Database +### Create page from Markdown +`POST /v1/pages` accepts a `markdown` body param. + +```bash +curl -s -X POST "https://api.notion.com/v1/pages" \ + -H "Authorization: Bearer $NOTION_API_KEY" \ + -H "Notion-Version: 2025-09-03" \ + -H "Content-Type: application/json" \ + -d '{ + "parent": {"page_id": "xxx"}, + "properties": {"title": [{"text": {"content": "Notes from meeting"}}]}, + "markdown": "# Agenda\n\n- Q3 roadmap\n- Hiring\n\n## Decisions\n- Ship MVP Friday" + }' +``` + +### Patch a page with Markdown +```bash +curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}/markdown" \ + -H "Authorization: Bearer $NOTION_API_KEY" \ + -H "Notion-Version: 2025-09-03" \ + -H "Content-Type: application/json" \ + -d '{"markdown": "## Update\n\nShipped the prototype."}' +``` + +### Create page in a database (typed properties) ```bash curl -s -X POST "https://api.notion.com/v1/pages" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -84,8 +236,7 @@ curl -s -X POST "https://api.notion.com/v1/pages" \ }' ``` -### Query a Database - +### Query a database (data source) ```bash curl -s -X POST "https://api.notion.com/v1/data_sources/{data_source_id}/query" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -97,8 +248,7 @@ curl -s -X POST "https://api.notion.com/v1/data_sources/{data_source_id}/query" }' ``` -### Create a Database - +### Create a database ```bash curl -s -X POST "https://api.notion.com/v1/data_sources" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -115,8 +265,7 @@ curl -s -X POST "https://api.notion.com/v1/data_sources" \ }' ``` -### Update Page Properties - +### Update page properties ```bash curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -125,8 +274,7 @@ curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}" \ -d '{"properties": {"Status": {"select": {"name": "Done"}}}}' ``` -### Add Content to a Page - +### Append blocks to a page ```bash curl -s -X PATCH "https://api.notion.com/v1/blocks/{page_id}/children" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -139,6 +287,21 @@ curl -s -X PATCH "https://api.notion.com/v1/blocks/{page_id}/children" \ }' ``` +### File uploads (3-step flow) +```bash +# 1. Create upload +curl -s -X POST "https://api.notion.com/v1/file_uploads" \ + -H "Authorization: Bearer $NOTION_API_KEY" \ + -H "Notion-Version: 2025-09-03" \ + -H "Content-Type: application/json" \ + -d '{"filename": "photo.png", "content_type": "image/png"}' + +# 2. PUT bytes to the upload_url returned above +curl -s -X PUT "{upload_url}" --data-binary @photo.png + +# 3. Reference {file_upload_id} in a page/block payload +``` + ## Property Types Common property formats for database items: @@ -154,19 +317,132 @@ Common property formats for database items: - **Email:** `{"email": "user@example.com"}` - **Relation:** `{"relation": [{"id": "page_id"}]}` -## Key Differences in API Version 2025-09-03 +## API Version 2025-09-03 — Databases vs Data Sources -- **Databases → Data Sources:** Use `/data_sources/` endpoints for queries and retrieval -- **Two IDs:** Each database has both a `database_id` and a `data_source_id` - - Use `database_id` when creating pages (`parent: {"database_id": "..."}`) - - Use `data_source_id` when querying (`POST /v1/data_sources/{id}/query`) -- **Search results:** Databases return as `"object": "data_source"` with their `data_source_id` +- **Databases became data sources.** Use `/data_sources/` endpoints for queries and retrieval. +- **Two IDs per database:** `database_id` and `data_source_id`. + - `database_id` when creating pages: `parent: {"database_id": "..."}` + - `data_source_id` when querying: `POST /v1/data_sources/{id}/query` +- Search returns databases as `"object": "data_source"` with the `data_source_id` field. + +## Notion Workers (advanced, requires `ntn`) + +Workers are TypeScript programs Notion hosts for you. One worker can expose any combination of: +- **Syncs** — pull data from external APIs into a Notion database on a schedule (default 30 min). +- **Tools** — appear as callable tools inside Notion's Custom Agents. +- **Webhooks** — receive HTTP events from external services (GitHub, Stripe, etc.) and act in Notion. + +**Plan / platform gating:** +- CLI works on all plans. **Deploying Workers requires Business or Enterprise.** +- `ntn` is macOS/Linux only as of May 2026. Windows users need WSL2 or to wait for native support. +- Free through August 11, 2026; metered on Notion credits after. + +### Minimal Worker + +```bash +ntn workers new my-worker # scaffold +cd my-worker +# Edit src/index.ts +ntn workers deploy --name my-worker +``` + +`src/index.ts`: +```typescript +import { Worker } from "@notionhq/workers"; + +const worker = new Worker(); +export default worker; + +worker.tool("greet", { + title: "Greet a User", + description: "Returns a friendly greeting", + inputSchema: { type: "object", properties: { name: { type: "string" } }, required: ["name"] }, + execute: async ({ name }) => `Hello, ${name}!`, +}); +``` + +### Webhook capability + +```typescript +worker.webhook("onGithubPush", { + title: "GitHub Push Handler", + execute: async (events, { notion }) => { + for (const event of events) { + // event.body, event.rawBody (for signature verification), event.headers + console.log("got delivery", event.deliveryId); + } + }, +}); +``` + +After deploy: `ntn workers webhooks list` shows the URL Notion generates. Treat that URL as a secret — anyone with it can POST events unless you add signature verification. + +### Worker lifecycle commands + +```bash +ntn workers deploy +ntn workers list +ntn workers exec -d '{"name": "world"}' +ntn workers sync trigger # run a sync now +ntn workers sync pause +ntn workers env set GITHUB_WEBHOOK_SECRET=... +ntn workers runs list # recent invocations +ntn workers runs logs +ntn workers webhooks list +``` + +When asked to build a Worker, scaffold with `ntn workers new`, write the code in `src/index.ts`, set any secrets with `ntn workers env set`, and deploy. Notion's docs at https://developers.notion.com/workers cover the full API surface. + +## Notion-Flavored Markdown (used by `/markdown` endpoints) + +Standard CommonMark plus XML-like tags for Notion-specific blocks. Use **tabs** for indentation. + +**Blocks beyond CommonMark:** +``` + + Ship the MVP by **Friday**. + + +
+Toggle title + Children indented one tab +
+ + + Left side + Right side + + + +``` + +**Inline:** +- Mentions: ``, `Title`, `` +- Underline: `text` +- Color: `text` or block-level `{color="blue"}` on the first line +- Math: inline `$x^2$`, block `$$ ... $$` +- Citations: `[^https://example.com]` + +**Colors:** `gray brown orange yellow green blue purple pink red`, plus `*_bg` variants for backgrounds. + +Headings 5/6 collapse to H4. Multiple `>` lines render as separate quote blocks — use `
` inside a single `>` for multi-line quotes. + +## Choosing the Right Path + +| Task | mac / Linux | Windows | +|---|---|---| +| Read/write pages, search, query databases | `ntn api ...` | curl | +| Read a page for an agent to summarize | `ntn api v1/pages/{id}/markdown` | curl `/markdown` endpoint | +| Upload a file | `ntn files create < file` | 3-step HTTP flow | +| One-off API exploration | `ntn api ...` | curl | +| Build a sync / webhook / agent tool hosted by Notion | `ntn workers ...` | WSL2 + `ntn workers ...` | ## Notes -- Page/database IDs are UUIDs (with or without dashes) -- Rate limit: ~3 requests/second average -- The API cannot set database view filters — that's UI-only -- Use `is_inline: true` when creating data sources to embed them in pages -- Add `-s` flag to curl to suppress progress bars (cleaner output for Hermes) -- Pipe output through `jq` for readable JSON: `... | jq '.results[0].properties'` +- Page/database IDs are UUIDs (with or without dashes — both accepted). +- Rate limit: ~3 requests/second average. The CLI doesn't bypass this. +- The API cannot set database **view** filters — that's UI-only. +- Use `"is_inline": true` when creating data sources to embed them in a page. +- Always pass `-s` to curl to suppress progress bars (cleaner agent output). +- Pipe JSON through `jq` when reading: `... | jq '.results[0].properties'`. +- Notion also ships an MCP server now (`Notion MCP`, ~91% more token-efficient on DB ops than the previous version) — wire it via Hermes' MCP support if you want streaming Notion access from inside a session, but the paths above are enough for most one-shot tasks. diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md index 8adeb3dcf76..c5b205f521d 100644 --- a/website/docs/reference/skills-catalog.md +++ b/website/docs/reference/skills-catalog.md @@ -144,7 +144,7 @@ If a skill is missing from this list but present in the repo, the catalog is reg | [`linear`](/docs/user-guide/skills/bundled/productivity/productivity-linear) | Linear: manage issues, projects, teams via GraphQL + curl. | `productivity/linear` | | [`maps`](/docs/user-guide/skills/bundled/productivity/productivity-maps) | Geocode, POIs, routes, timezones via OpenStreetMap/OSRM. | `productivity/maps` | | [`nano-pdf`](/docs/user-guide/skills/bundled/productivity/productivity-nano-pdf) | Edit PDF text/typos/titles via nano-pdf CLI (NL prompts). | `productivity/nano-pdf` | -| [`notion`](/docs/user-guide/skills/bundled/productivity/productivity-notion) | Notion API via curl: pages, databases, blocks, search. | `productivity/notion` | +| [`notion`](/docs/user-guide/skills/bundled/productivity/productivity-notion) | Notion API + ntn CLI: pages, databases, markdown, Workers. | `productivity/notion` | | [`ocr-and-documents`](/docs/user-guide/skills/bundled/productivity/productivity-ocr-and-documents) | Extract text from PDFs/scans (pymupdf, marker-pdf). | `productivity/ocr-and-documents` | | [`powerpoint`](/docs/user-guide/skills/bundled/productivity/productivity-powerpoint) | Create, read, edit .pptx decks, slides, notes, templates. | `productivity/powerpoint` | | [`teams-meeting-pipeline`](/docs/user-guide/skills/bundled/productivity/productivity-teams-meeting-pipeline) | Operate the Teams meeting summary pipeline via Hermes CLI — summarize meetings, inspect pipeline status, replay jobs, manage Microsoft Graph subscriptions. | `productivity/teams-meeting-pipeline` | diff --git a/website/docs/user-guide/skills/bundled/productivity/productivity-notion.md b/website/docs/user-guide/skills/bundled/productivity/productivity-notion.md index 7e8fab2f2ba..80487d6b88f 100644 --- a/website/docs/user-guide/skills/bundled/productivity/productivity-notion.md +++ b/website/docs/user-guide/skills/bundled/productivity/productivity-notion.md @@ -1,14 +1,14 @@ --- -title: "Notion — Notion API via curl: pages, databases, blocks, search" +title: "Notion — Notion API + ntn CLI: pages, databases, markdown, Workers" sidebar_label: "Notion" -description: "Notion API via curl: pages, databases, blocks, search" +description: "Notion API + ntn CLI: pages, databases, markdown, Workers" --- {/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} # Notion -Notion API via curl: pages, databases, blocks, search. +Notion API + ntn CLI: pages, databases, markdown, Workers. ## Skill metadata @@ -16,11 +16,11 @@ Notion API via curl: pages, databases, blocks, search. |---|---| | Source | Bundled (installed by default) | | Path | `skills/productivity/notion` | -| Version | `1.0.0` | +| Version | `2.0.0` | | Author | community | | License | MIT | | Platforms | linux, macos, windows | -| Tags | `Notion`, `Productivity`, `Notes`, `Database`, `API` | +| Tags | `Notion`, `Productivity`, `Notes`, `Database`, `API`, `CLI`, `Workers` | ## Reference: full SKILL.md @@ -28,23 +28,146 @@ Notion API via curl: pages, databases, blocks, search. The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. ::: -# Notion API +# Notion -Use the Notion API via curl to create, read, update pages, databases (data sources), and blocks. No extra tools needed — just curl and a Notion API key. +Talk to Notion two ways. Same integration token works for both — pick by what's available. -## Prerequisites +◆ **`ntn` CLI** — Notion's official CLI. Shorter syntax, one-line file uploads, required for Workers. macOS + Linux only as of May 2026 (Windows support "coming soon"). **Default when installed.** +◆ **HTTP + curl** — works everywhere including Windows. **Default fallback** when `ntn` isn't installed. + +## Setup + +### 1. Get an integration token (required for both paths) 1. Create an integration at https://notion.so/my-integrations 2. Copy the API key (starts with `ntn_` or `secret_`) -3. Store it in `~/.hermes/.env`: +3. Store in `~/.hermes/.env`: ``` NOTION_API_KEY=ntn_your_key_here ``` -4. **Important:** Share target pages/databases with your integration in Notion (click "..." → "Connect to" → your integration name) +4. **Share target pages/databases with the integration** in Notion: page menu `...` → `Connect to` → your integration name. Without this, the API returns 404 for that page even though it exists. + +### 2. Install `ntn` (preferred path on macOS / Linux) + +```bash +# Recommended +curl -fsSL https://ntn.dev | bash + +# Or via npm (needs Node 22+, npm 10+) +npm install --global ntn + +ntn --version # verify +``` + +**Skip `ntn login` — use the integration token instead.** This works headlessly, no browser needed: +```bash +export NOTION_API_TOKEN=$NOTION_API_KEY # ntn reads NOTION_API_TOKEN +export NOTION_KEYRING=0 # don't try to use the OS keychain +``` + +Add those exports to your shell profile (or to `~/.hermes/.env`) so every session inherits them. + +### 3. Choose path at runtime + +```bash +if command -v ntn >/dev/null 2>&1; then + # use ntn +else + # fall back to curl +fi +``` + +Windows users: skip step 2 entirely until native `ntn` ships — Path B works fine. If you want CLI ergonomics now, install `ntn` inside WSL2. ## API Basics -All requests use this pattern: +`Notion-Version: 2025-09-03` is required on all HTTP requests. `ntn` handles this for you. In this version, what users call "databases" are called **data sources** in the API. + +## Path A — `ntn` CLI (preferred, macOS / Linux) + +### Raw API calls (shorthand for curl) +```bash +ntn api v1/users # GET +ntn api v1/pages parent[page_id]=abc123 \ # POST with inline body + properties[title][0][text][content]="Notes" +ntn api v1/pages/abc123 -X PATCH archived:=true # PATCH; := is non-string (bool/num/null) +``` + +Syntax notes: +- `key=value` — string fields +- `key[nested]=value` — nested object fields +- `key:=value` — typed assignment (booleans, numbers, null, arrays) + +### Search +```bash +ntn api v1/search query="page title" +``` + +### Read page metadata +```bash +ntn api v1/pages/{page_id} +``` + +### Read page as Markdown (agent-friendly) +```bash +ntn api v1/pages/{page_id}/markdown +``` + +### Read page content as blocks +```bash +ntn api v1/blocks/{page_id}/children +``` + +### Create page from Markdown +```bash +ntn api v1/pages \ + parent[page_id]=xxx \ + properties[title][0][text][content]="Notes from meeting" \ + markdown="# Agenda + +- Q3 roadmap +- Hiring" +``` + +### Patch a page with Markdown +```bash +ntn api v1/pages/{page_id}/markdown -X PATCH \ + markdown="## Update + +Shipped the prototype." +``` + +### Query a database (data source) +```bash +ntn api v1/data_sources/{data_source_id}/query -X POST \ + filter[property]=Status filter[select][equals]=Active +``` + +For complex queries with `sorts`, multiple filter clauses, or compound logic, pipe JSON in: +```bash +echo '{"filter": {"property": "Status", "select": {"equals": "Active"}}, "sorts": [{"property": "Date", "direction": "descending"}]}' | \ + ntn api v1/data_sources/{data_source_id}/query -X POST --json - +``` + +### File uploads (one-liner — biggest CLI win) +```bash +ntn files create < photo.png +ntn files create --external-url https://example.com/photo.png +ntn files list +``` + +Compare to the 3-step HTTP flow (create upload → PUT bytes → reference). + +### Useful env vars +| Var | Effect | +|---|---| +| `NOTION_API_TOKEN` | Auth token (overrides keychain) — set this to your integration token | +| `NOTION_KEYRING=0` | File-based creds at `~/.config/notion/auth.json` instead of OS keychain | +| `NOTION_WORKSPACE_ID` | Skip the workspace picker prompt | + +## Path B — HTTP + curl (cross-platform, default on Windows) + +All requests share this pattern: ```bash curl -s -X GET "https://api.notion.com/v1/..." \ @@ -53,12 +176,9 @@ curl -s -X GET "https://api.notion.com/v1/..." \ -H "Content-Type: application/json" ``` -The `Notion-Version` header is required. This skill uses `2025-09-03` (latest). In this version, databases are called "data sources" in the API. - -## Common Operations +On Windows the `curl` shipped with Windows 10+ works as-is. PowerShell users can also use `Invoke-RestMethod`. ### Search - ```bash curl -s -X POST "https://api.notion.com/v1/search" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -67,24 +187,56 @@ curl -s -X POST "https://api.notion.com/v1/search" \ -d '{"query": "page title"}' ``` -### Get Page - +### Read page metadata ```bash curl -s "https://api.notion.com/v1/pages/{page_id}" \ -H "Authorization: Bearer $NOTION_API_KEY" \ -H "Notion-Version: 2025-09-03" ``` -### Get Page Content (blocks) +### Read page as Markdown (agent-friendly) +Easier to feed to a model than block JSON. + +```bash +curl -s "https://api.notion.com/v1/pages/{page_id}/markdown" \ + -H "Authorization: Bearer $NOTION_API_KEY" \ + -H "Notion-Version: 2025-09-03" +``` + +### Read page content as blocks (when you need structure) ```bash curl -s "https://api.notion.com/v1/blocks/{page_id}/children" \ -H "Authorization: Bearer $NOTION_API_KEY" \ -H "Notion-Version: 2025-09-03" ``` -### Create Page in a Database +### Create page from Markdown +`POST /v1/pages` accepts a `markdown` body param. + +```bash +curl -s -X POST "https://api.notion.com/v1/pages" \ + -H "Authorization: Bearer $NOTION_API_KEY" \ + -H "Notion-Version: 2025-09-03" \ + -H "Content-Type: application/json" \ + -d '{ + "parent": {"page_id": "xxx"}, + "properties": {"title": [{"text": {"content": "Notes from meeting"}}]}, + "markdown": "# Agenda\n\n- Q3 roadmap\n- Hiring\n\n## Decisions\n- Ship MVP Friday" + }' +``` + +### Patch a page with Markdown +```bash +curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}/markdown" \ + -H "Authorization: Bearer $NOTION_API_KEY" \ + -H "Notion-Version: 2025-09-03" \ + -H "Content-Type: application/json" \ + -d '{"markdown": "## Update\n\nShipped the prototype."}' +``` + +### Create page in a database (typed properties) ```bash curl -s -X POST "https://api.notion.com/v1/pages" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -99,8 +251,7 @@ curl -s -X POST "https://api.notion.com/v1/pages" \ }' ``` -### Query a Database - +### Query a database (data source) ```bash curl -s -X POST "https://api.notion.com/v1/data_sources/{data_source_id}/query" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -112,8 +263,7 @@ curl -s -X POST "https://api.notion.com/v1/data_sources/{data_source_id}/query" }' ``` -### Create a Database - +### Create a database ```bash curl -s -X POST "https://api.notion.com/v1/data_sources" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -130,8 +280,7 @@ curl -s -X POST "https://api.notion.com/v1/data_sources" \ }' ``` -### Update Page Properties - +### Update page properties ```bash curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -140,8 +289,7 @@ curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}" \ -d '{"properties": {"Status": {"select": {"name": "Done"}}}}' ``` -### Add Content to a Page - +### Append blocks to a page ```bash curl -s -X PATCH "https://api.notion.com/v1/blocks/{page_id}/children" \ -H "Authorization: Bearer $NOTION_API_KEY" \ @@ -154,6 +302,21 @@ curl -s -X PATCH "https://api.notion.com/v1/blocks/{page_id}/children" \ }' ``` +### File uploads (3-step flow) +```bash +# 1. Create upload +curl -s -X POST "https://api.notion.com/v1/file_uploads" \ + -H "Authorization: Bearer $NOTION_API_KEY" \ + -H "Notion-Version: 2025-09-03" \ + -H "Content-Type: application/json" \ + -d '{"filename": "photo.png", "content_type": "image/png"}' + +# 2. PUT bytes to the upload_url returned above +curl -s -X PUT "{upload_url}" --data-binary @photo.png + +# 3. Reference {file_upload_id} in a page/block payload +``` + ## Property Types Common property formats for database items: @@ -169,19 +332,132 @@ Common property formats for database items: - **Email:** `{"email": "user@example.com"}` - **Relation:** `{"relation": [{"id": "page_id"}]}` -## Key Differences in API Version 2025-09-03 +## API Version 2025-09-03 — Databases vs Data Sources -- **Databases → Data Sources:** Use `/data_sources/` endpoints for queries and retrieval -- **Two IDs:** Each database has both a `database_id` and a `data_source_id` - - Use `database_id` when creating pages (`parent: {"database_id": "..."}`) - - Use `data_source_id` when querying (`POST /v1/data_sources/{id}/query`) -- **Search results:** Databases return as `"object": "data_source"` with their `data_source_id` +- **Databases became data sources.** Use `/data_sources/` endpoints for queries and retrieval. +- **Two IDs per database:** `database_id` and `data_source_id`. + - `database_id` when creating pages: `parent: {"database_id": "..."}` + - `data_source_id` when querying: `POST /v1/data_sources/{id}/query` +- Search returns databases as `"object": "data_source"` with the `data_source_id` field. + +## Notion Workers (advanced, requires `ntn`) + +Workers are TypeScript programs Notion hosts for you. One worker can expose any combination of: +- **Syncs** — pull data from external APIs into a Notion database on a schedule (default 30 min). +- **Tools** — appear as callable tools inside Notion's Custom Agents. +- **Webhooks** — receive HTTP events from external services (GitHub, Stripe, etc.) and act in Notion. + +**Plan / platform gating:** +- CLI works on all plans. **Deploying Workers requires Business or Enterprise.** +- `ntn` is macOS/Linux only as of May 2026. Windows users need WSL2 or to wait for native support. +- Free through August 11, 2026; metered on Notion credits after. + +### Minimal Worker + +```bash +ntn workers new my-worker # scaffold +cd my-worker +# Edit src/index.ts +ntn workers deploy --name my-worker +``` + +`src/index.ts`: +```typescript +import { Worker } from "@notionhq/workers"; + +const worker = new Worker(); +export default worker; + +worker.tool("greet", { + title: "Greet a User", + description: "Returns a friendly greeting", + inputSchema: { type: "object", properties: { name: { type: "string" } }, required: ["name"] }, + execute: async ({ name }) => `Hello, ${name}!`, +}); +``` + +### Webhook capability + +```typescript +worker.webhook("onGithubPush", { + title: "GitHub Push Handler", + execute: async (events, { notion }) => { + for (const event of events) { + // event.body, event.rawBody (for signature verification), event.headers + console.log("got delivery", event.deliveryId); + } + }, +}); +``` + +After deploy: `ntn workers webhooks list` shows the URL Notion generates. Treat that URL as a secret — anyone with it can POST events unless you add signature verification. + +### Worker lifecycle commands + +```bash +ntn workers deploy +ntn workers list +ntn workers exec -d '{"name": "world"}' +ntn workers sync trigger # run a sync now +ntn workers sync pause +ntn workers env set GITHUB_WEBHOOK_SECRET=... +ntn workers runs list # recent invocations +ntn workers runs logs +ntn workers webhooks list +``` + +When asked to build a Worker, scaffold with `ntn workers new`, write the code in `src/index.ts`, set any secrets with `ntn workers env set`, and deploy. Notion's docs at https://developers.notion.com/workers cover the full API surface. + +## Notion-Flavored Markdown (used by `/markdown` endpoints) + +Standard CommonMark plus XML-like tags for Notion-specific blocks. Use **tabs** for indentation. + +**Blocks beyond CommonMark:** +``` + + Ship the MVP by **Friday**. + + +
+Toggle title + Children indented one tab +
+ + + Left side + Right side + + + +``` + +**Inline:** +- Mentions: ``, `Title`, `` +- Underline: `text` +- Color: `text` or block-level `{color="blue"}` on the first line +- Math: inline `$x^2$`, block `$$ ... $$` +- Citations: `[^https://example.com]` + +**Colors:** `gray brown orange yellow green blue purple pink red`, plus `*_bg` variants for backgrounds. + +Headings 5/6 collapse to H4. Multiple `>` lines render as separate quote blocks — use `
` inside a single `>` for multi-line quotes. + +## Choosing the Right Path + +| Task | mac / Linux | Windows | +|---|---|---| +| Read/write pages, search, query databases | `ntn api ...` | curl | +| Read a page for an agent to summarize | `ntn api v1/pages/{id}/markdown` | curl `/markdown` endpoint | +| Upload a file | `ntn files create < file` | 3-step HTTP flow | +| One-off API exploration | `ntn api ...` | curl | +| Build a sync / webhook / agent tool hosted by Notion | `ntn workers ...` | WSL2 + `ntn workers ...` | ## Notes -- Page/database IDs are UUIDs (with or without dashes) -- Rate limit: ~3 requests/second average -- The API cannot set database view filters — that's UI-only -- Use `is_inline: true` when creating data sources to embed them in pages -- Add `-s` flag to curl to suppress progress bars (cleaner output for Hermes) -- Pipe output through `jq` for readable JSON: `... | jq '.results[0].properties'` +- Page/database IDs are UUIDs (with or without dashes — both accepted). +- Rate limit: ~3 requests/second average. The CLI doesn't bypass this. +- The API cannot set database **view** filters — that's UI-only. +- Use `"is_inline": true` when creating data sources to embed them in a page. +- Always pass `-s` to curl to suppress progress bars (cleaner agent output). +- Pipe JSON through `jq` when reading: `... | jq '.results[0].properties'`. +- Notion also ships an MCP server now (`Notion MCP`, ~91% more token-efficient on DB ops than the previous version) — wire it via Hermes' MCP support if you want streaming Notion access from inside a session, but the paths above are enough for most one-shot tasks. From 2d7182f72c398496db60de5c18f8554d7ecc6d82 Mon Sep 17 00:00:00 2001 From: sprmn24 Date: Fri, 15 May 2026 18:53:52 +0300 Subject: [PATCH 085/218] fix(delegate): move heartbeat thread start inside try block to prevent orphan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _heartbeat_thread.start() was called before the try/finally block that contains _heartbeat_stop.set(). If _register_subagent() or any code between .start() and try: raised an exception, the finally block would never run — leaving the heartbeat thread as an orphan that continues calling _touch_activity() on the parent agent, incorrectly resetting gateway timeout counters. Move _heartbeat_thread.start() to be the first statement inside the try block so the finally block always reaches _heartbeat_stop.set() regardless of how the child run completes or fails. Root cause: heartbeat start outside try/finally scope Impact: orphan heartbeat thread incorrectly resets parent gateway timeouts --- tools/delegate_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index f4da5127a18..2cdce9cae64 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -1431,7 +1431,6 @@ def _run_single_child( pass _heartbeat_thread = threading.Thread(target=_heartbeat_loop, daemon=True) - _heartbeat_thread.start() # Register the live agent in the module-level registry so the TUI can # target it by subagent_id (kill, pause, status queries). Unregistered @@ -1462,6 +1461,7 @@ def _run_single_child( ) try: + _heartbeat_thread.start() if child_progress_cb: try: child_progress_cb("subagent.start", preview=goal) From 6068363311b861ad0bb411bfffe5958bf8b6d142 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 15:01:09 -0700 Subject: [PATCH 086/218] fix(delegate): guard heartbeat join against unstarted thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pairs with the prior commit (start() now inside the try block). If threading.Thread.start() itself raises (OS thread exhaustion under heavy delegation fanout), the finally would call .join() on a never-started thread, which raises RuntimeError("cannot join thread before it is started") — trading one rare bug for another. Thread.ident is None until start() succeeds, so gate the join on it. --- tools/delegate_tool.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 2cdce9cae64..f3a037c4341 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -1836,9 +1836,13 @@ def _run_single_child( finally: # Stop the heartbeat thread so it doesn't keep touching parent activity - # after the child has finished (or failed). + # after the child has finished (or failed). Guard the join: .start() + # now lives inside the try block, so if it raised (OS thread + # exhaustion) the thread was never started and Thread.join() would + # raise RuntimeError. ident is None until start() succeeds. _heartbeat_stop.set() - _heartbeat_thread.join(timeout=5) + if _heartbeat_thread.ident is not None: + _heartbeat_thread.join(timeout=5) # Drop the TUI-facing registry entry. Safe to call even if the # child was never registered (e.g. ID missing on test doubles). From 7fee1f61eb52d1706af04c9606ee1a2e7ef3afc3 Mon Sep 17 00:00:00 2001 From: sprmn24 Date: Fri, 15 May 2026 18:28:45 +0300 Subject: [PATCH 087/218] fix(memory): eliminate TOCTOU race in Windows file lock creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows (msvcrt path), _file_lock() first checked if the lock file existed and wrote it with write_text(), then opened it with open('r+'). Between these two calls, another process could delete the file causing open('r+') to raise FileNotFoundError — uncaught, leaving memory writes to proceed without holding the lock, risking data corruption. Replace the three-line sequence with a single open('a+', ...) call which atomically creates the file if missing or opens it if it exists, closing the TOCTOU window entirely. The existing fd.seek(0) before msvcrt.locking() is preserved and sufficient for correct lock byte positioning. Root cause: TOCTOU between lock_path.write_text() and open('r+') Impact: concurrent memory writes on Windows could corrupt MEMORY.md --- tools/memory_tool.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/memory_tool.py b/tools/memory_tool.py index 236760a464a..42737f66c4f 100644 --- a/tools/memory_tool.py +++ b/tools/memory_tool.py @@ -156,10 +156,7 @@ class MemoryStore: yield return - if msvcrt and (not lock_path.exists() or lock_path.stat().st_size == 0): - lock_path.write_text(" ", encoding="utf-8") - - fd = open(lock_path, "r+" if msvcrt else "a+", encoding="utf-8") + fd = open(lock_path, "a+", encoding="utf-8") try: if fcntl: fcntl.flock(fd, fcntl.LOCK_EX) From 4aec25bc4411edb4563292cadbd02c365c846286 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 14:58:28 -0700 Subject: [PATCH 088/218] fix(windows): stop spamming cwd-missing + tirith-spawn warnings on every terminal call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two log-spam fixes surfaced by a Windows user (Git Bash + Python 3.11.9): 1. LocalEnvironment cwd warn spam ============================ Git Bash's `pwd -P` emits paths like `/c/Users/x`. The base-class `_extract_cwd_from_output` was assigning this verbatim to `self.cwd` without validation, then `_resolve_safe_cwd`'s `os.path.isdir(/c/...)` returned False on Windows, triggering: LocalEnvironment cwd '/c/Users/NVIDIA' is missing on disk; falling back to '/' so terminal commands keep working. ...on every terminal call. The pre-existing Windows-path translation inside `_run_bash` ran AFTER the safe-cwd check, so it could never prevent the warning. Fix: - New `_msys_to_windows_path` helper (idempotent, no-op off Windows). - `_resolve_safe_cwd` normalizes before `isdir`, so a valid MSYS path is recognized as the real directory it points at. - `LocalEnvironment._update_cwd` and a new override of `_extract_cwd_from_output` translate + validate before mutating `self.cwd`. Stale / non-existent marker paths roll back to the previous cwd instead of clobbering it. - The fallback warning still fires when the directory really is gone (deletion-recovery scenario from #17558 still covered). 2. tirith spawn-failed warn spam ============================= When tirith isn't installed (background install in flight, or marked failed for the day) and the configured path stays as the bare string `tirith`, every `subprocess.run([tirith_path, ...])` raises OSError and logged: tirith spawn failed: [WinError 2] The system cannot find the file specified ...on every command. fail_open=True means behaviour is correct, but the log noise is severe. Fix: - `_warn_once(key, ...)` thread-safe dedupe helper. - Three hot-path warnings (`tirith path resolved to None`, `tirith spawn failed: ...`, `tirith timed out after Ns`) now log once per (exception class, errno) / timeout-value / path-none key. - Dedupe set is cleared on `_clear_install_failed` so a successful install lets a subsequent failure surface again. Tests ===== - `tests/tools/test_local_env_windows_msys.py`: 12 tests covering the MSYS→Windows translator, the resolve fast-path, update_cwd validation, and extract_cwd_from_output rollback. - `tests/tools/test_tirith_security.py`: 4 new dedupe tests (15 spawn failures → 1 log line; distinct exc types → 2 lines; timeout dedupe; path-None dedupe). Targeted runs: test_local_env_windows_msys.py 12 passed test_local_env_cwd_recovery.py 7 passed (pre-existing, no regressions) test_tirith_security.py 67 passed (63 pre-existing + 4 new) test_base_environment + local_* 37 passed (no regressions) test_local_env_blocklist + neighbours 114 passed Reported via Hermes log capture: 19× cwd warnings + 15× tirith warnings in a single short session. --- tests/tools/test_local_env_windows_msys.py | 200 +++++++++++++++++++++ tests/tools/test_tirith_security.py | 117 ++++++++++++ tools/environments/local.py | 85 +++++++-- tools/tirith_security.py | 53 +++++- 4 files changed, 441 insertions(+), 14 deletions(-) create mode 100644 tests/tools/test_local_env_windows_msys.py diff --git a/tests/tools/test_local_env_windows_msys.py b/tests/tools/test_local_env_windows_msys.py new file mode 100644 index 00000000000..6987c965af6 --- /dev/null +++ b/tests/tools/test_local_env_windows_msys.py @@ -0,0 +1,200 @@ +"""Tests for the Windows / Git Bash MSYS-path normalization in +``LocalEnvironment``. + +Background +---------- +On Windows, ``pwd -P`` inside Git Bash emits paths like +``/c/Users/NVIDIA``. ``subprocess.Popen(..., cwd=...)`` only accepts +native Windows paths (``C:\\Users\\NVIDIA``), and the validation done +by ``_resolve_safe_cwd`` was also checking the MSYS form against +``os.path.isdir``, which returns ``False`` on Windows. The combined +effect was a warning logged on every single terminal call: + + LocalEnvironment cwd '/c/Users/NVIDIA' is missing on disk; + falling back to '/' so terminal commands keep working. + +These tests fake the Windows env on Linux CI by patching ``_IS_WINDOWS`` +and ``os.path.isdir`` so the MSYS path tests as "missing" exactly like +on the real OS. +""" + +import os +from unittest.mock import patch + +import pytest + +from tools.environments import local as local_mod +from tools.environments.local import ( + LocalEnvironment, + _msys_to_windows_path, + _resolve_safe_cwd, +) + + +# --------------------------------------------------------------------------- +# _msys_to_windows_path — pure-function unit tests +# --------------------------------------------------------------------------- + +class TestMsysToWindowsPath: + def test_noop_on_non_windows(self, monkeypatch): + monkeypatch.setattr(local_mod, "_IS_WINDOWS", False) + # On a non-Windows host the function must never rewrite the path + # — POSIX-style paths are real paths there. + assert _msys_to_windows_path("/c/Users/NVIDIA") == "/c/Users/NVIDIA" + assert _msys_to_windows_path("/home/teknium") == "/home/teknium" + + def test_translates_drive_path(self, monkeypatch): + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + assert _msys_to_windows_path("/c/Users/NVIDIA") == r"C:\Users\NVIDIA" + assert _msys_to_windows_path("/d/Projects/foo bar") == r"D:\Projects\foo bar" + + def test_translates_bare_drive_root(self, monkeypatch): + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + # Bare "/c" alone should resolve to the drive root. + assert _msys_to_windows_path("/c") == "C:\\" + # Trailing slash on the drive letter is also a root. + assert _msys_to_windows_path("/c/") == "C:\\" + + def test_idempotent_on_already_windows_path(self, monkeypatch): + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + assert _msys_to_windows_path(r"C:\Users\NVIDIA") == r"C:\Users\NVIDIA" + + def test_does_not_translate_multi_char_first_segment(self, monkeypatch): + """``/tmp/foo`` and ``/home/x`` must NOT be misread as drive paths + just because they start with ``/`` and a single letter — the regex + only matches when the first segment is exactly one character.""" + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + assert _msys_to_windows_path("/tmp/foo") == "/tmp/foo" + assert _msys_to_windows_path("/home/x") == "/home/x" + + def test_empty_string(self, monkeypatch): + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + assert _msys_to_windows_path("") == "" + + +# --------------------------------------------------------------------------- +# _resolve_safe_cwd — Windows fast path +# --------------------------------------------------------------------------- + +class TestResolveSafeCwdWindows: + def test_msys_path_resolves_to_native_when_native_exists( + self, monkeypatch, tmp_path, + ): + """The whole point of this fix: a Git Bash ``/c/Users/x`` value + should resolve to its native equivalent if that native dir exists, + WITHOUT falling back to the temp dir.""" + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + + # tmp_path is a real native dir on the test host. Build a fake + # MSYS form pointing at it and prove the resolver finds it. + native = str(tmp_path) + # Construct a synthetic MSYS form for whatever tmp_path is. + # On Linux CI tmp_path is /tmp/... ; the resolver shouldn't even + # try to translate that (regex won't match), so emulate the + # mapping by pointing the translator at the real native dir. + with patch.object( + local_mod, "_msys_to_windows_path", return_value=native + ): + assert _resolve_safe_cwd("/c/whatever") == native + + +# --------------------------------------------------------------------------- +# End-to-end: _update_cwd via marker file (Windows simulation) +# --------------------------------------------------------------------------- + +class TestUpdateCwdWindowsMsys: + def test_marker_file_msys_path_stored_in_native_form( + self, monkeypatch, tmp_path, + ): + """When Git Bash writes ``/c/Users/x`` to the cwd marker file on + Windows, ``_update_cwd`` must translate to native form before + validating and storing — otherwise ``os.path.isdir`` rejects a + perfectly real directory.""" + original = tmp_path / "starting" + original.mkdir() + + # Fake Windows for the test + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + + with patch.object( + LocalEnvironment, "init_session", autospec=True, return_value=None + ): + env = LocalEnvironment(cwd=str(original), timeout=10) + + # Pretend Git Bash wrote an MSYS path that maps to tmp_path/"next" + new_dir = tmp_path / "next" + new_dir.mkdir() + + with open(env._cwd_file, "w") as f: + f.write("/c/whatever/from/bash") + + # Translate the synthetic MSYS string to the real native dir. + def fake_translate(p): + if p == "/c/whatever/from/bash": + return str(new_dir) + return p + + with patch.object(local_mod, "_msys_to_windows_path", side_effect=fake_translate): + env._update_cwd({"output": "", "returncode": 0}) + + assert env.cwd == str(new_dir) + + +# --------------------------------------------------------------------------- +# End-to-end: _extract_cwd_from_output rollback when marker is invalid +# --------------------------------------------------------------------------- + +class TestExtractCwdFromOutputWindowsMsys: + def test_stale_msys_marker_does_not_clobber_cwd(self, monkeypatch, tmp_path): + """When the cwd marker in stdout points at a non-existent path, + ``LocalEnvironment._extract_cwd_from_output`` must roll back to + the previous cwd instead of propagating a bad value.""" + original = tmp_path / "starting" + original.mkdir() + + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + + with patch.object( + LocalEnvironment, "init_session", autospec=True, return_value=None + ): + env = LocalEnvironment(cwd=str(original), timeout=10) + + marker = env._cwd_marker + result = { + "output": f"some command output\n{marker}/c/no/such/path{marker}\n", + "returncode": 0, + } + + # Translation produces a path that doesn't exist on disk → rollback. + with patch.object( + local_mod, + "_msys_to_windows_path", + return_value=str(tmp_path / "definitely-does-not-exist"), + ): + env._extract_cwd_from_output(result) + + assert env.cwd == str(original) + + def test_valid_msys_marker_normalized_to_native(self, monkeypatch, tmp_path): + original = tmp_path / "starting" + original.mkdir() + new_dir = tmp_path / "next" + new_dir.mkdir() + + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + + with patch.object( + LocalEnvironment, "init_session", autospec=True, return_value=None + ): + env = LocalEnvironment(cwd=str(original), timeout=10) + + marker = env._cwd_marker + result = { + "output": f"x\n{marker}/c/whatever{marker}\n", + "returncode": 0, + } + + with patch.object(local_mod, "_msys_to_windows_path", return_value=str(new_dir)): + env._extract_cwd_from_output(result) + + assert env.cwd == str(new_dir) diff --git a/tests/tools/test_tirith_security.py b/tests/tools/test_tirith_security.py index 20d20ccfa11..ecaf4f4e639 100644 --- a/tests/tools/test_tirith_security.py +++ b/tests/tools/test_tirith_security.py @@ -1007,3 +1007,120 @@ class TestHermesHomeIsolation: expected = os.path.join(os.path.expanduser("~"), ".hermes") result = _get_hermes_home() assert result == expected + + +# --------------------------------------------------------------------------- +# Warn-once dedupe (issue: tirith spawn failed spamming on Windows) +# --------------------------------------------------------------------------- + +class TestSpawnWarningDedup: + """When tirith isn't installed yet (background install in flight, or + install marked failed), every terminal command spammed an identical + ``tirith spawn failed: [WinError 2]`` warning to ``errors.log``. The + dedupe set in ``_warn_once`` collapses repeats by ``(exc class, errno)`` + while still surfacing the first occurrence so users see the failure. + """ + + @patch("tools.tirith_security.subprocess.run") + @patch("tools.tirith_security._load_security_config") + def test_repeated_spawn_failure_logs_once(self, mock_cfg, mock_run, caplog): + mock_cfg.return_value = { + "tirith_enabled": True, "tirith_path": "tirith", + "tirith_timeout": 5, "tirith_fail_open": True, + } + mock_run.side_effect = FileNotFoundError("[WinError 2]") + # Fresh dedupe state — clear any keys left by other tests. + _tirith_mod._reset_spawn_warning_state() + + with caplog.at_level("WARNING", logger="tools.tirith_security"): + for _ in range(15): + result = check_command_security("echo hi") + # Behavior must remain the same on every call — + # fail-open allow, with the exception captured in summary. + assert result["action"] == "allow" + assert "unavailable" in result["summary"] + + spawn_warnings = [ + rec for rec in caplog.records + if "tirith spawn failed" in rec.message + ] + assert len(spawn_warnings) == 1, ( + f"expected exactly 1 spawn-failed warning across 15 commands, " + f"got {len(spawn_warnings)}: {[r.message for r in spawn_warnings]}" + ) + + @patch("tools.tirith_security.subprocess.run") + @patch("tools.tirith_security._load_security_config") + def test_distinct_exception_types_each_log_once(self, mock_cfg, mock_run, caplog): + """``FileNotFoundError`` and ``PermissionError`` are distinct + failure modes and each deserves its own first-occurrence log + line; the dedupe key includes the exception class.""" + mock_cfg.return_value = { + "tirith_enabled": True, "tirith_path": "tirith", + "tirith_timeout": 5, "tirith_fail_open": True, + } + _tirith_mod._reset_spawn_warning_state() + + with caplog.at_level("WARNING", logger="tools.tirith_security"): + mock_run.side_effect = FileNotFoundError("[WinError 2]") + for _ in range(3): + check_command_security("a") + mock_run.side_effect = PermissionError("denied") + for _ in range(3): + check_command_security("b") + + spawn_warnings = [ + rec for rec in caplog.records + if "tirith spawn failed" in rec.message + ] + assert len(spawn_warnings) == 2, ( + f"expected 2 distinct first-occurrence warnings, " + f"got {len(spawn_warnings)}" + ) + + @patch("tools.tirith_security.subprocess.run") + @patch("tools.tirith_security._load_security_config") + def test_repeated_timeout_logs_once(self, mock_cfg, mock_run, caplog): + mock_cfg.return_value = { + "tirith_enabled": True, "tirith_path": "tirith", + "tirith_timeout": 5, "tirith_fail_open": True, + } + mock_run.side_effect = subprocess.TimeoutExpired(cmd="tirith", timeout=5) + _tirith_mod._reset_spawn_warning_state() + + with caplog.at_level("WARNING", logger="tools.tirith_security"): + for _ in range(10): + result = check_command_security("slow") + assert result["action"] == "allow" + + timeout_warnings = [ + rec for rec in caplog.records + if "tirith timed out" in rec.message + ] + assert len(timeout_warnings) == 1 + + @patch("tools.tirith_security._load_security_config") + def test_path_none_logs_once(self, mock_cfg, caplog): + """``_resolve_tirith_path`` returning ``None`` (explicit path set + but resolver returned None — unusual) should not spam the log + either.""" + mock_cfg.return_value = { + "tirith_enabled": True, "tirith_path": "tirith", + "tirith_timeout": 5, "tirith_fail_open": True, + } + _tirith_mod._reset_spawn_warning_state() + + with patch( + "tools.tirith_security._resolve_tirith_path", return_value=None + ): + with caplog.at_level("WARNING", logger="tools.tirith_security"): + for _ in range(10): + result = check_command_security("echo") + assert result["action"] == "allow" + assert "tirith path unavailable" in result["summary"] + + none_warnings = [ + rec for rec in caplog.records + if "tirith path resolved to None" in rec.message + ] + assert len(none_warnings) == 1 diff --git a/tools/environments/local.py b/tools/environments/local.py index 7aa75a62d0c..3b9d65449fa 100644 --- a/tools/environments/local.py +++ b/tools/environments/local.py @@ -18,18 +18,44 @@ _IS_WINDOWS = platform.system() == "Windows" logger = logging.getLogger(__name__) +def _msys_to_windows_path(cwd: str) -> str: + """Translate a Git Bash / MSYS-style POSIX path (``/c/Users/x``) to the + native Windows form (``C:\\Users\\x``) so ``os.path.isdir`` and + ``subprocess.Popen(..., cwd=...)`` can find it. + + No-ops on non-Windows hosts or for paths that aren't in MSYS form. + Returns the input unchanged when no translation applies. This is + idempotent — calling it on an already-Windows path returns it as-is. + """ + if not _IS_WINDOWS or not cwd: + return cwd + # Match leading "//" or exactly "/" (bare drive root). + m = re.match(r'^/([a-zA-Z])(/.*)?$', cwd) + if not m: + return cwd + drive = m.group(1).upper() + tail = (m.group(2) or "").replace('/', '\\') + return f"{drive}:{tail or chr(92)}" # chr(92) = backslash, avoid raw-string escape + + def _resolve_safe_cwd(cwd: str) -> str: """Return ``cwd`` if it exists as a directory, else the nearest existing ancestor. Falls back to ``tempfile.gettempdir()`` only if walking up the path can't find any existing directory (effectively never on a healthy filesystem, but cheap belt-and-braces). + On Windows, also normalizes Git Bash / MSYS-style POSIX paths + (``/c/Users/x``) to native Windows form before the isdir check so a + perfectly valid ``pwd -P`` result from bash doesn't get rejected as + "missing" (see ``_msys_to_windows_path``). + Used by ``_run_bash`` to recover when the configured cwd is gone — most commonly because a previous tool call deleted its own working directory (issue #17558). Without this guard, ``subprocess.Popen(..., cwd=...)`` raises ``FileNotFoundError`` before bash starts, wedging every subsequent terminal call until the gateway restarts. """ + cwd = _msys_to_windows_path(cwd) if _IS_WINDOWS else cwd if cwd and os.path.isdir(cwd): return cwd parent = os.path.dirname(cwd) if cwd else "" @@ -455,21 +481,27 @@ class LocalEnvironment(BaseEnvironment): # (issue #17558). Popen would otherwise raise FileNotFoundError on # the cwd before bash starts, wedging every subsequent call until the # gateway restarts. + # + # On Windows, ``_resolve_safe_cwd`` also normalises Git Bash-style + # POSIX paths (``/c/Users/...``) to native form so a perfectly valid + # ``pwd -P`` result from bash isn't mistakenly treated as "missing" + # and spammed as a warning on every command. safe_cwd = _resolve_safe_cwd(self.cwd) if safe_cwd != self.cwd: - logger.warning( - "LocalEnvironment cwd %r is missing on disk; " - "falling back to %r so terminal commands keep working.", - self.cwd, - safe_cwd, - ) + # MSYS → Windows translation alone shouldn't surface as a warning + # (it's a benign normalization, not a recovery). Only warn when + # the directory really doesn't exist on disk. + normalized = _msys_to_windows_path(self.cwd) if _IS_WINDOWS else self.cwd + if safe_cwd != normalized: + logger.warning( + "LocalEnvironment cwd %r is missing on disk; " + "falling back to %r so terminal commands keep working.", + self.cwd, + safe_cwd, + ) self.cwd = safe_cwd - # On Windows, self.cwd may be a Git Bash-style path (/c/Users/...) - # from pwd output. subprocess.Popen needs a native Windows path. _popen_cwd = self.cwd - if _IS_WINDOWS and _popen_cwd and re.match(r'^/[a-zA-Z]/', _popen_cwd): - _popen_cwd = _popen_cwd[1].upper() + ':' + _popen_cwd[2:].replace('/', '\\') proc = subprocess.Popen( args, @@ -571,10 +603,19 @@ class LocalEnvironment(BaseEnvironment): ``pwd -P`` on a deleted cwd can leave a stale value in the marker file, and propagating it would re-wedge the next ``Popen``. The ``_run_bash`` recovery path will resolve a safe fallback if needed. + + On Windows, the value written by Git Bash's ``pwd -P`` is in + MSYS form (``/c/Users/x``). Translate it to native Windows form + before validating with ``os.path.isdir`` and before storing on + ``self.cwd``; otherwise the isdir check rejects every valid + result and ``_run_bash`` later prints a misleading "cwd is + missing" warning on every command. """ try: with open(self._cwd_file, encoding="utf-8") as f: cwd_path = f.read().strip() + if _IS_WINDOWS: + cwd_path = _msys_to_windows_path(cwd_path) if cwd_path and os.path.isdir(cwd_path): self.cwd = cwd_path except (OSError, FileNotFoundError): @@ -583,6 +624,30 @@ class LocalEnvironment(BaseEnvironment): # Still strip the marker from output so it's not visible self._extract_cwd_from_output(result) + def _extract_cwd_from_output(self, result: dict): + """Same semantics as the base class, but on Windows the value + emitted by ``pwd -P`` inside Git Bash is in MSYS form + (``/c/Users/x``). Normalize to native Windows form and validate + the directory exists before assigning to ``self.cwd`` — otherwise + ``_run_bash``'s safe-cwd recovery would warn on every subsequent + command. + + Always defers to the base class for stripping the marker text from + ``result["output"]`` so output formatting is identical. + """ + # Snapshot pre-existing cwd, defer to base for parsing + marker + # stripping, then validate / normalize whatever it assigned. + prev_cwd = self.cwd + super()._extract_cwd_from_output(result) + if self.cwd != prev_cwd: + normalized = _msys_to_windows_path(self.cwd) if _IS_WINDOWS else self.cwd + if normalized and os.path.isdir(normalized): + self.cwd = normalized + else: + # Stale / non-existent path — keep previous cwd; _run_bash + # will resolve a safe fallback on the next call if needed. + self.cwd = prev_cwd + def cleanup(self): """Clean up temp files.""" for f in (self._snapshot_path, self._cwd_file): diff --git a/tools/tirith_security.py b/tools/tirith_security.py index 350265d33a1..1c79892f424 100644 --- a/tools/tirith_security.py +++ b/tools/tirith_security.py @@ -101,6 +101,34 @@ _install_failure_reason: str = "" # reason tag when _resolved_path is _INSTALL_ _install_lock = threading.Lock() _install_thread: threading.Thread | None = None +# Warning de-duplication. The spawn/path warnings live in the hot path — +# without this dedupe set, a Windows install where ``tirith`` isn't on PATH +# (e.g. background install thread still running, or install marked failed) +# spams ``tirith spawn failed: [WinError 2]...`` once per terminal command, +# easily filling errors.log with hundreds of identical lines. +_warned_messages: set[str] = set() +_warned_lock = threading.Lock() + + +def _warn_once(key: str, message: str, *args) -> None: + """``logger.warning`` but at-most-once per ``key`` for the process + lifetime. Used to avoid drowning the log when a fail-open tirith + misconfiguration fires on every command.""" + with _warned_lock: + if key in _warned_messages: + return + _warned_messages.add(key) + logger.warning(message, *args) + + +def _reset_spawn_warning_state() -> None: + """Clear the warn-once dedupe set. Called when tirith is freshly + (re)installed so a subsequent failure surfaces again — e.g. user + deletes the binary mid-session. + """ + with _warned_lock: + _warned_messages.clear() + # Disk-persistent failure marker — avoids retry across process restarts _MARKER_TTL = 86400 # 24 hours @@ -168,6 +196,10 @@ def _mark_install_failed(reason: str = ""): def _clear_install_failed(): """Remove the failure marker after successful install.""" + # Reset the warn-once dedupe set so a subsequent failure (e.g. user + # deletes the binary) surfaces in the log again instead of being + # silently suppressed by a stale dedupe key from before the fix. + _reset_spawn_warning_state() try: os.unlink(_failure_marker_path()) except OSError: @@ -632,7 +664,10 @@ def check_command_security(command: str) -> dict: fail_open = cfg["tirith_fail_open"] if tirith_path is None: - logger.warning("tirith path resolved to None; scanning disabled") + _warn_once( + "tirith_path_none", + "tirith path resolved to None; scanning disabled", + ) if fail_open: return {"action": "allow", "findings": [], "summary": "tirith path unavailable"} return {"action": "block", "findings": [], "summary": "tirith path unavailable (fail-closed)"} @@ -646,13 +681,23 @@ def check_command_security(command: str) -> dict: timeout=timeout, ) except OSError as exc: - # Covers FileNotFoundError, PermissionError, exec format error - logger.warning("tirith spawn failed: %s", exc) + # Covers FileNotFoundError, PermissionError, exec format error. + # Dedupe by ``(errno, exc class)`` so a transient failure mode + # surfaces once but doesn't drown the log on every command — + # commonly seen on Windows when the configured path "tirith" + # isn't on PATH yet (background install still running, or + # install marked failed for the day). + spawn_key = f"tirith_spawn_failed:{type(exc).__name__}:{getattr(exc, 'errno', '')}" + _warn_once(spawn_key, "tirith spawn failed: %s", exc) if fail_open: return {"action": "allow", "findings": [], "summary": f"tirith unavailable: {exc}"} return {"action": "block", "findings": [], "summary": f"tirith spawn failed (fail-closed): {exc}"} except subprocess.TimeoutExpired: - logger.warning("tirith timed out after %ds", timeout) + _warn_once( + f"tirith_timeout:{timeout}", + "tirith timed out after %ds", + timeout, + ) if fail_open: return {"action": "allow", "findings": [], "summary": f"tirith timed out ({timeout}s)"} return {"action": "block", "findings": [], "summary": "tirith timed out (fail-closed)"} From 31ba2b0cbcac310f7aa2db3c8885e37f2e2e37fb Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 16:35:12 -0700 Subject: [PATCH 089/218] fix(xai-oauth): recover from prelude SSE errors, gate reasoning replay, surface entitlement 403s (#26644) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes for the May 2026 xAI OAuth (SuperGrok / X Premium) rollout failures: - _run_codex_stream: when openai SDK raises RuntimeError("Expected to have received `response.created` before ``"), retry once then fall back to responses.create(stream=True) — same path used for missing-response.completed postlude. Fallback surfaces the real provider error with body+status_code intact. Also fixes #8133 (response.in_progress prelude on custom relays) and #14634 (codex.rate_limits prelude on codex-lb). - _summarize_api_error: when error body matches xAI's entitlement shape, append a one-line hint pointing to https://grok.com and /model. Once-only, applies to both auxiliary warnings and main-loop error surfacing. - _chat_messages_to_responses_input: new is_xai_responses kwarg drops replayed codex_reasoning_items (encrypted_content) before they reach xAI. Also drops reasoning.encrypted_content from the xAI include array. Native Codex behavior unchanged. Grok still reasons natively each turn; coherence rides on visible message text alone. Closes #8133, #14634. --- agent/codex_responses_adapter.py | 27 +- agent/transports/codex.py | 19 +- run_agent.py | 81 +++- .../agent/transports/test_codex_transport.py | 21 +- .../test_codex_xai_oauth_recovery.py | 351 ++++++++++++++++++ 5 files changed, 481 insertions(+), 18 deletions(-) create mode 100644 tests/run_agent/test_codex_xai_oauth_recovery.py diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index 00345f054e8..6fe9dc5bc64 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -244,8 +244,21 @@ def _normalize_responses_message_status(value: Any, *, default: str = "completed return default -def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Convert internal chat-style messages to Responses input items.""" +def _chat_messages_to_responses_input( + messages: List[Dict[str, Any]], + *, + is_xai_responses: bool = False, +) -> List[Dict[str, Any]]: + """Convert internal chat-style messages to Responses input items. + + ``is_xai_responses=True`` strips ``encrypted_content`` from replayed + reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface + rejects encrypted reasoning blobs minted by prior turns: the request + streams an ``error`` SSE frame before ``response.created`` and the + OpenAI SDK collapses it into a generic stream-ordering error. Native + Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content + — keep the default off. + """ items: List[Dict[str, Any]] = [] seen_item_ids: set = set() @@ -271,9 +284,17 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di if role == "assistant": # Replay encrypted reasoning items from previous turns # so the API can maintain coherent reasoning chains. + # + # xAI OAuth (SuperGrok/Premium) rejects replayed + # ``encrypted_content`` reasoning items minted by prior + # turns — see _chat_messages_to_responses_input docstring. + # When ``is_xai_responses`` is set we drop the replay + # entirely; Grok still reasons on each turn server-side, + # we just don't try to thread the prior turn's encrypted + # blob back in. codex_reasoning = msg.get("codex_reasoning_items") has_codex_reasoning = False - if isinstance(codex_reasoning, list): + if isinstance(codex_reasoning, list) and not is_xai_responses: for ri in codex_reasoning: if isinstance(ri, dict) and ri.get("encrypted_content"): item_id = ri.get("id") diff --git a/agent/transports/codex.py b/agent/transports/codex.py index cfd9f128778..3661ea17a3e 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -24,7 +24,10 @@ class ResponsesApiTransport(ProviderTransport): def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: """Convert OpenAI chat messages to Responses API input items.""" from agent.codex_responses_adapter import _chat_messages_to_responses_input - return _chat_messages_to_responses_input(messages) + return _chat_messages_to_responses_input( + messages, + is_xai_responses=bool(kwargs.get("is_xai_responses")), + ) def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: """Convert OpenAI tool schemas to Responses API function definitions.""" @@ -93,7 +96,10 @@ class ResponsesApiTransport(ProviderTransport): kwargs = { "model": model, "instructions": instructions, - "input": _chat_messages_to_responses_input(payload_messages), + "input": _chat_messages_to_responses_input( + payload_messages, + is_xai_responses=is_xai_responses, + ), "tools": response_tools, "store": False, } @@ -110,7 +116,14 @@ class ResponsesApiTransport(ProviderTransport): if reasoning_enabled and is_xai_responses: from agent.model_metadata import grok_supports_reasoning_effort - kwargs["include"] = ["reasoning.encrypted_content"] + # NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content`` + # any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects + # replayed encrypted reasoning items on turn 2+ — see + # _chat_messages_to_responses_input docstring. Requesting the field + # back would just have us cache something we then must strip. Grok + # still reasons natively each turn; coherence across turns rides on + # the visible message text alone. + kwargs["include"] = [] # xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3 # / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though # those models reason natively. Only send the effort dial when diff --git a/run_agent.py b/run_agent.py index 7e42beb3eba..2b20d48ede2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4966,6 +4966,45 @@ class AIAgent: trajectory = self._convert_to_trajectory_format(messages, user_query, completed) _save_trajectory_to_file(trajectory, self.model, completed) + @staticmethod + def _decorate_xai_entitlement_error(detail: str) -> str: + """Append a friendly hint when xAI's OAuth surface returns an + entitlement-shaped error. + + xAI's ``/v1/responses`` endpoint replies to OAuth tokens that lack a + SuperGrok / X Premium subscription with HTTP 403 carrying a body like:: + + {"code": "The caller does not have permission to execute the + specified operation", "error": "You have either run out of + available resources or do not have an active Grok subscription. + Manage subscriptions at https://grok.com/..."} + + The raw text is useful but the action the user needs to take (subscribe + on grok.com, or switch providers with ``/model``) isn't obvious from + the wire format. Detect the entitlement shape and append a hint. + + Matched once per detail string — won't double-decorate if the upstream + already concatenated the same text. + """ + if not detail: + return detail + lower = detail.lower() + is_entitlement = ( + "do not have an active grok subscription" in lower + or ("out of available resources" in lower and "grok" in lower) + or ("does not have permission" in lower and "grok" in lower) + ) + if not is_entitlement: + return detail + hint = ( + " — xAI OAuth account lacks SuperGrok / X Premium entitlement for " + "this model. Subscribe at https://grok.com or run `/model` to " + "switch providers." + ) + if hint.strip() in detail: + return detail + return f"{detail}{hint}" + @staticmethod def _summarize_api_error(error: Exception) -> str: """Extract a human-readable one-liner from an API error. @@ -4999,12 +5038,12 @@ class AIAgent: if msg: status_code = getattr(error, "status_code", None) prefix = f"HTTP {status_code}: " if status_code else "" - return f"{prefix}{msg[:300]}" + return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}") # Fallback: truncate the raw string but give more room than 200 chars status_code = getattr(error, "status_code", None) prefix = f"HTTP {status_code}: " if status_code else "" - return f"{prefix}{raw[:500]}" + return AIAgent._decorate_xai_entitlement_error(f"{prefix}{raw[:500]}") def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]: if not key: @@ -7056,18 +7095,48 @@ class AIAgent: except RuntimeError as exc: err_text = str(exc) missing_completed = "response.completed" in err_text - if missing_completed and attempt < max_stream_retries: + # The OpenAI SDK's Responses streaming state machine raises + # ``RuntimeError("Expected to have received `response.created` + # before ``")`` when the first SSE event from the + # server is anything other than ``response.created`` — and it + # discards the event's payload before we can read it. Three + # real-world backends emit a different first frame: + # + # * xAI on grok-4.x OAuth — sends ``error`` (issues + # reported around the May 2026 SuperGrok rollout when + # multi-turn conversations replay encrypted reasoning + # content the OAuth tier rejects) + # * codex-lb relays — send ``codex.rate_limits`` (#14634) + # * custom Responses relays — send ``response.in_progress`` + # (#8133) + # + # In all three cases the underlying byte stream is still + # readable: a non-stream ``responses.create(stream=True)`` + # fallback succeeds and surfaces the real provider error as + # a normal exception with body+status_code attached, which + # ``_summarize_api_error`` can then translate into a useful + # user-facing line. Treat ``response.created`` prelude + # errors the same way we already treat ``response.completed`` + # postlude errors. + prelude_error = ( + "Expected to have received `response.created`" in err_text + or "Expected to have received \"response.created\"" in err_text + ) + if (missing_completed or prelude_error) and attempt < max_stream_retries: logger.debug( - "Responses stream closed before completion (attempt %s/%s); retrying. %s", + "Responses stream %s (attempt %s/%s); retrying. %s", + "prelude rejected" if prelude_error else "closed before completion", attempt + 1, max_stream_retries + 1, self._client_log_context(), ) continue - if missing_completed: + if missing_completed or prelude_error: logger.debug( - "Responses stream did not emit response.completed; falling back to create(stream=True). %s", + "Responses stream %s; falling back to create(stream=True). %s err=%s", + "rejected before response.created" if prelude_error else "did not emit response.completed", self._client_log_context(), + err_text, ) return self._run_codex_create_stream_fallback(api_kwargs, client=active_client) raise diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index ad70167b09f..82251823790 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -194,9 +194,16 @@ class TestCodexBuildKwargs: is_xai_responses=True, reasoning_config={"effort": "high"}, ) - # xAI Responses must receive both encrypted reasoning content and the effort + # xAI Responses receives reasoning.effort on the allowlisted models. assert kw.get("reasoning") == {"effort": "high"} - assert "reasoning.encrypted_content" in kw.get("include", []) + # As of May 2026 we deliberately do NOT request + # reasoning.encrypted_content back from xAI — the OAuth/SuperGrok + # surface rejects replayed encrypted reasoning items on turn 2+ + # (the multi-turn "Expected to have received response.created + # before error" failure). Grok still reasons natively each turn; + # we just don't try to thread the prior turn's encrypted blob back + # in. See tests/run_agent/test_codex_xai_oauth_recovery.py. + assert "reasoning.encrypted_content" not in kw.get("include", []) def test_xai_reasoning_disabled_no_reasoning_key(self, transport): messages = [{"role": "user", "content": "Hi"}] @@ -222,8 +229,9 @@ class TestCodexBuildKwargs: # api.x.ai 400s with "Model X does not support parameter reasoningEffort" # on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*. # Those models reason natively but don't expose the dial. The transport - # must omit the `reasoning` key for them while keeping the encrypted - # reasoning content include so we can capture native reasoning tokens. + # must omit the `reasoning` key for them. As of May 2026 we also no + # longer request ``reasoning.encrypted_content`` back from xAI on ANY + # model — see test_xai_reasoning_effort_passed for the rationale. def test_xai_grok_4_omits_reasoning_effort(self, transport): """grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400.""" @@ -237,8 +245,9 @@ class TestCodexBuildKwargs: assert "reasoning" not in kw, ( f"{model} must not receive a reasoning key (xAI rejects it)" ) - # Still capture native reasoning tokens - assert "reasoning.encrypted_content" in kw.get("include", []) + # We no longer ask xAI for encrypted_content back (see comment + # above) — verify the include list is empty. + assert "reasoning.encrypted_content" not in kw.get("include", []) def test_xai_grok_4_fast_omits_reasoning_effort(self, transport): """grok-4-fast and grok-4-1-fast variants reject reasoning.effort.""" diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py new file mode 100644 index 00000000000..0f3603d2ca7 --- /dev/null +++ b/tests/run_agent/test_codex_xai_oauth_recovery.py @@ -0,0 +1,351 @@ +"""Regression tests for the May 2026 xAI OAuth (SuperGrok / X Premium) bugs. + +Three distinct failure modes the user community hit during rollout: + +1. ``RuntimeError("Expected to have received `response.created` before + `error`")`` on multi-turn xAI OAuth conversations. The OpenAI SDK's + Responses streaming state machine collapses an upstream ``error`` SSE + frame into a generic stream-ordering error. ``_run_codex_stream`` + now treats this the same way it already treats the missing + ``response.completed`` postlude — fall back to a non-stream + ``responses.create(stream=True)`` which surfaces the real provider + error. Also closes #8133 (``response.in_progress`` prelude on custom + relays) and #14634 (``codex.rate_limits`` prelude on codex-lb). + +2. The HTTP 403 entitlement error xAI returns when an OAuth token lacks + SuperGrok / X Premium ("You have either run out of available + resources or do not have an active Grok subscription") used to read + as a confusing wall of JSON. ``_summarize_api_error`` now appends a + one-line hint pointing the user at https://grok.com and ``/model``. + +3. Multi-turn replay of ``codex_reasoning_items`` (with + ``encrypted_content``) is now suppressed for ``is_xai_responses=True`` + in ``_chat_messages_to_responses_input``. xAI's OAuth/SuperGrok + surface rejects replayed encrypted reasoning items; Grok still + reasons natively each turn, so coherence rides on visible message + text. +""" + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Fix A: prelude error fallback +# --------------------------------------------------------------------------- + + +def _make_codex_agent(): + """Build a minimal AIAgent wired for codex_responses streaming tests.""" + from run_agent import AIAgent + + agent = AIAgent( + api_key="test-key", + base_url="https://api.x.ai/v1", + model="grok-4.3", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + agent.api_mode = "codex_responses" + agent.provider = "xai-oauth" + agent._interrupt_requested = False + return agent + + +@pytest.mark.parametrize( + "prelude_event_type", + [ + "error", # xAI OAuth multi-turn + "codex.rate_limits", # codex-lb relays (#14634) + "response.in_progress", # custom Responses relays (#8133) + ], +) +def test_codex_stream_prelude_error_falls_back_to_create_stream(prelude_event_type): + """The SDK's prelude RuntimeError must trigger the non-stream fallback. + + When the first SSE event isn't ``response.created``, openai-python + raises RuntimeError before our event loop sees anything. We must + detect that, retry once, then fall back to ``create(stream=True)`` + which surfaces the real provider error or a real response. + """ + agent = _make_codex_agent() + + prelude_error = RuntimeError( + f"Expected to have received `response.created` before `{prelude_event_type}`" + ) + + mock_client = MagicMock() + mock_client.responses.stream.side_effect = prelude_error + + fallback_response = SimpleNamespace( + output=[SimpleNamespace( + type="message", + content=[SimpleNamespace(type="output_text", text="fallback ok")], + )], + status="completed", + ) + + with patch.object( + agent, "_run_codex_create_stream_fallback", return_value=fallback_response + ) as mock_fallback: + result = agent._run_codex_stream({}, client=mock_client) + + assert result is fallback_response + mock_fallback.assert_called_once_with({}, client=mock_client) + + +def test_codex_stream_prelude_error_retries_once_before_fallback(): + """The retry path must fire one extra stream attempt before falling back.""" + agent = _make_codex_agent() + + call_count = {"n": 0} + + def stream_side_effect(**kwargs): + call_count["n"] += 1 + raise RuntimeError( + "Expected to have received `response.created` before `error`" + ) + + mock_client = MagicMock() + mock_client.responses.stream.side_effect = stream_side_effect + + fallback_response = SimpleNamespace(output=[], status="completed") + with patch.object( + agent, "_run_codex_create_stream_fallback", return_value=fallback_response + ) as mock_fallback: + agent._run_codex_stream({}, client=mock_client) + + # max_stream_retries=1 → one retry + final attempt → 2 stream calls, + # THEN the fallback path runs. + assert call_count["n"] == 2 + mock_fallback.assert_called_once() + + +def test_codex_stream_unrelated_runtimeerror_still_raises(): + """RuntimeErrors that aren't prelude/postlude shape must propagate.""" + agent = _make_codex_agent() + + mock_client = MagicMock() + mock_client.responses.stream.side_effect = RuntimeError("something else broke") + + with patch.object(agent, "_run_codex_create_stream_fallback") as mock_fallback: + with pytest.raises(RuntimeError, match="something else broke"): + agent._run_codex_stream({}, client=mock_client) + + mock_fallback.assert_not_called() + + +def test_codex_stream_postlude_error_still_falls_back(): + """Existing ``response.completed`` fallback must not regress.""" + agent = _make_codex_agent() + + mock_client = MagicMock() + mock_client.responses.stream.side_effect = RuntimeError( + "Didn't receive a `response.completed` event." + ) + + fallback_response = SimpleNamespace(output=[], status="completed") + with patch.object( + agent, "_run_codex_create_stream_fallback", return_value=fallback_response + ) as mock_fallback: + result = agent._run_codex_stream({}, client=mock_client) + + assert result is fallback_response + mock_fallback.assert_called_once() + + +# --------------------------------------------------------------------------- +# Fix B: friendly entitlement message +# --------------------------------------------------------------------------- + + +def test_summarize_api_error_decorates_xai_entitlement_403(): + """xAI's OAuth 403 must end with the subscribe-or-switch hint.""" + from run_agent import AIAgent + + error = RuntimeError( + "HTTP 403: Error code: 403 - {'code': 'The caller does not have permission " + "to execute the specified operation', 'error': 'You have either run out of " + "available resources or do not have an active Grok subscription. Manage " + "subscriptions at https://grok.com'}" + ) + summary = AIAgent._summarize_api_error(error) + assert "do not have an active Grok subscription" in summary + assert "SuperGrok" in summary + assert "/model" in summary + assert "https://grok.com" in summary + + +def test_summarize_api_error_decorates_xai_body_message(): + """SDK-style error with structured body must also get the hint.""" + from run_agent import AIAgent + + class _XaiErr(Exception): + status_code = 403 + body = { + "error": { + "message": ( + "You have either run out of available resources or do " + "not have an active Grok subscription. Manage at " + "https://grok.com" + ) + } + } + + summary = AIAgent._summarize_api_error(_XaiErr("403")) + assert "HTTP 403" in summary + assert "SuperGrok / X Premium" in summary + + +def test_summarize_api_error_idempotent_for_entitlement_hint(): + """Decorating twice must not double up the hint.""" + from run_agent import AIAgent + + raw = "HTTP 403: do not have an active Grok subscription" + once = AIAgent._decorate_xai_entitlement_error(raw) + twice = AIAgent._decorate_xai_entitlement_error(once) + assert once == twice + + +def test_summarize_api_error_passes_through_unrelated_errors(): + """Non-xAI / non-entitlement errors must not be touched.""" + from run_agent import AIAgent + + error = RuntimeError("HTTP 500: upstream is sad") + summary = AIAgent._summarize_api_error(error) + assert "SuperGrok" not in summary + assert "grok.com" not in summary + assert "upstream is sad" in summary + + +# --------------------------------------------------------------------------- +# Fix C: reasoning replay gating for xai-oauth +# --------------------------------------------------------------------------- + + +def _assistant_msg_with_encrypted_reasoning(text="hi from grok", encrypted="enc_blob"): + return { + "role": "assistant", + "content": text, + "codex_reasoning_items": [ + { + "type": "reasoning", + "id": "rs_xai_001", + "encrypted_content": encrypted, + "summary": [], + } + ], + } + + +def test_codex_reasoning_replay_default_includes_encrypted_content(): + """Native Codex backend (default) must still replay encrypted reasoning.""" + from agent.codex_responses_adapter import _chat_messages_to_responses_input + + msgs = [ + {"role": "user", "content": "hi"}, + _assistant_msg_with_encrypted_reasoning(), + {"role": "user", "content": "what's your name?"}, + ] + + items = _chat_messages_to_responses_input(msgs) + reasoning = [it for it in items if it.get("type") == "reasoning"] + assert len(reasoning) == 1 + assert reasoning[0]["encrypted_content"] == "enc_blob" + + +def test_codex_reasoning_replay_stripped_for_xai_oauth(): + """xAI OAuth surface must NOT receive replayed encrypted reasoning.""" + from agent.codex_responses_adapter import _chat_messages_to_responses_input + + msgs = [ + {"role": "user", "content": "hi"}, + _assistant_msg_with_encrypted_reasoning(), + {"role": "user", "content": "what's your name?"}, + ] + + items = _chat_messages_to_responses_input(msgs, is_xai_responses=True) + reasoning = [it for it in items if it.get("type") == "reasoning"] + assert reasoning == [] + + # The assistant's visible text must still survive — coherence across + # turns rides on the message text alone. + assistant_items = [ + it for it in items + if it.get("role") == "assistant" or it.get("type") == "message" + ] + assert assistant_items, "assistant message must still be present" + + +def test_codex_transport_xai_request_omits_encrypted_content_include(): + """Verify the xAI ``include`` array no longer requests encrypted reasoning.""" + from agent.transports.codex import ResponsesApiTransport + + transport = ResponsesApiTransport() + kwargs = transport.build_kwargs( + model="grok-4.3", + messages=[ + {"role": "system", "content": "you are a helpful assistant"}, + {"role": "user", "content": "hi"}, + ], + tools=None, + instructions="you are a helpful assistant", + reasoning_config={"enabled": True, "effort": "medium"}, + is_xai_responses=True, + ) + # Without this gate, xAI would echo back encrypted_content blobs we'd + # then store in codex_reasoning_items and replay next turn — which is + # exactly the multi-turn failure mode we're closing. + assert kwargs["include"] == [] + + +def test_codex_transport_xai_strips_replayed_reasoning_in_input(): + """End-to-end: build_kwargs on xai-oauth must strip prior reasoning.""" + from agent.transports.codex import ResponsesApiTransport + + transport = ResponsesApiTransport() + kwargs = transport.build_kwargs( + model="grok-4.3", + messages=[ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hi"}, + _assistant_msg_with_encrypted_reasoning(text="hi from grok"), + {"role": "user", "content": "what's your name?"}, + ], + tools=None, + instructions="sys", + reasoning_config={"enabled": True, "effort": "medium"}, + is_xai_responses=True, + ) + input_items = kwargs["input"] + reasoning_items = [it for it in input_items if it.get("type") == "reasoning"] + assert reasoning_items == [] + + +def test_codex_transport_native_codex_still_replays_reasoning_in_input(): + """Regression guard: openai-codex must keep the existing replay path.""" + from agent.transports.codex import ResponsesApiTransport + + transport = ResponsesApiTransport() + kwargs = transport.build_kwargs( + model="gpt-5-codex", + messages=[ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hi"}, + _assistant_msg_with_encrypted_reasoning(text="hi from codex"), + {"role": "user", "content": "next"}, + ], + tools=None, + instructions="sys", + reasoning_config={"enabled": True, "effort": "medium"}, + is_xai_responses=False, + ) + input_items = kwargs["input"] + reasoning_items = [it for it in input_items if it.get("type") == "reasoning"] + assert len(reasoning_items) == 1 + assert reasoning_items[0]["encrypted_content"] == "enc_blob" + # Native Codex still asks for encrypted_content back. + assert "reasoning.encrypted_content" in kwargs.get("include", []) From 068c24f8a4203e86de32b0d84ccaf047e8cd6ef7 Mon Sep 17 00:00:00 2001 From: twebefy Date: Sat, 25 Apr 2026 00:46:10 +0800 Subject: [PATCH 090/218] feat(deepseek): add thinking.type + reasoning_effort mapping for DeepSeek API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DeepSeek's thinking mode requires both: - extra_body.thinking.type: "enabled" to activate thinking mode - top-level reasoning_effort: "max" or "high" to control depth Previously, the ChatCompletionsTransport only handled Kimi's thinking mode — DeepSeek was left unmapped, so reasoning_effort config was silently dropped. This patch: 1. Adds is_deepseek: bool to the Params dataclass, detected by base_url matching api.deepseek.com 2. Maps Hermes effort levels (xhigh/max → "max", low/medium/high → themselves) to the top-level reasoning_effort parameter 3. Sets extra_body.thinking.type alongside the effort 4. Strips reasoning_content from assistant messages sent back to DeepSeek, preventing 400 errors when thinking was enabled --- agent/transports/chat_completions.py | 20 ++++++++++++++++++++ run_agent.py | 7 +++++++ 2 files changed, 27 insertions(+) diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 7edb69e42c7..1ae584e9159 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -189,6 +189,7 @@ class ChatCompletionsTransport(ProviderTransport): is_kimi: bool is_tokenhub: bool is_lmstudio: bool + is_deepseek: bool is_custom_provider: bool ollama_num_ctx: int | None # Provider routing @@ -348,6 +349,25 @@ class ChatCompletionsTransport(ProviderTransport): "type": "enabled" if _kimi_thinking_enabled else "disabled", } + # DeepSeek extra_body.thinking + top-level reasoning_effort + is_deepseek = params.get("is_deepseek", False) + if is_deepseek: + _ds_thinking_enabled = True + if reasoning_config and isinstance(reasoning_config, dict): + if reasoning_config.get("enabled") is False: + _ds_thinking_enabled = False + extra_body["thinking"] = { + "type": "enabled" if _ds_thinking_enabled else "disabled", + } + # DeepSeek effort: low/medium→high, high→high, xhigh/max→max + if _ds_thinking_enabled and reasoning_config: + _e = (reasoning_config.get("effort") or "").strip().lower() + if _e in ("xhigh", "max"): + api_kwargs["reasoning_effort"] = "max" + elif _e in ("low", "medium", "high"): + api_kwargs["reasoning_effort"] = _e + # If no effort configured, don't set it → DeepSeek defaults to high + # Reasoning. LM Studio is handled above via top-level reasoning_effort, # so skip emitting extra_body.reasoning for it. if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False): diff --git a/run_agent.py b/run_agent.py index 2b20d48ede2..c9aa3157170 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9798,6 +9798,7 @@ class AIAgent: ) _is_tokenhub = base_url_host_matches(self._base_url_lower, "tokenhub.tencentmaas.com") _is_lmstudio = (self.provider or "").strip().lower() == "lmstudio" + _is_deepseek = base_url_host_matches(self.base_url, "api.deepseek.com") # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE # sentinel (temperature omitted entirely), a numeric override, or None. @@ -9909,6 +9910,7 @@ class AIAgent: is_kimi=_is_kimi, is_tokenhub=_is_tokenhub, is_lmstudio=_is_lmstudio, + is_deepseek=_is_deepseek, is_custom_provider=self.provider == "custom", ollama_num_ctx=self._ollama_num_ctx, provider_preferences=_prefs or None, @@ -10368,6 +10370,11 @@ class AIAgent: # context compaction). Don't pass null to the API. api_msg.pop("reasoning_content", None) + # DeepSeek: strip reasoning_content on all assistant messages so the API + # doesn't return 400 when the model was invoked with thinking enabled. + if base_url_host_matches(self.base_url, "api.deepseek.com"): + api_msg.pop("reasoning_content", None) + @staticmethod def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict: """Strip Codex Responses API fields from tool_calls for strict providers. From cd9470f41638bd515db096cd934c463205790110 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 16:39:18 -0700 Subject: [PATCH 091/218] fix(deepseek): wire thinking-mode via DeepSeekProfile, not legacy fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cherry-picked PR #15251 from @tw2818 correctly identified the DeepSeek 400 root cause but placed the fix in the legacy fallback path of `build_kwargs`, which DeepSeek never reaches — DeepSeek has a registered ProviderProfile and goes through `_build_kwargs_from_profile` instead. The legacy-path block was therefore dead code. This commit pivots the fix to where it actually fires: - New `DeepSeekProfile` in `plugins/model-providers/deepseek/__init__.py` overrides `build_api_kwargs_extras` to emit DeepSeek's expected wire format (mirrors `KimiProfile`): {"reasoning_effort": "", "extra_body": {"thinking": {"type": "enabled" | "disabled"}}} - Model gating: only `deepseek-v4-*` and `deepseek-reasoner` emit thinking control. `deepseek-chat` (V3) is untouched — current behavior. - Effort mapping: low/medium/high passthrough, xhigh/max → max, unset → omitted (DeepSeek server applies its own default). - Revert the legacy-path additions from PR #15251 — they were dead code, and the `_copy_reasoning_content_for_api` strip block specifically would have nullified the existing reasoning_content padding machinery (`_needs_deepseek_tool_reasoning` → space-pad on replay) that the active provider already relies on for replay correctness. - Unit tests pin the wire-shape contract and the model gating rules (26 tests, all passing). Existing transport + provider profile suites (321 tests) continue to pass. - AUTHOR_MAP: map twebefy@gmail.com → tw2818 for release notes credit. Closes #15700, #17212, #17825. Co-authored-by: tw2818 --- agent/transports/chat_completions.py | 20 -- plugins/model-providers/deepseek/__init__.py | 83 +++++++- run_agent.py | 7 - scripts/release.py | 1 + .../model_providers/test_deepseek_profile.py | 184 ++++++++++++++++++ 5 files changed, 266 insertions(+), 29 deletions(-) create mode 100644 tests/plugins/model_providers/test_deepseek_profile.py diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 1ae584e9159..7edb69e42c7 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -189,7 +189,6 @@ class ChatCompletionsTransport(ProviderTransport): is_kimi: bool is_tokenhub: bool is_lmstudio: bool - is_deepseek: bool is_custom_provider: bool ollama_num_ctx: int | None # Provider routing @@ -349,25 +348,6 @@ class ChatCompletionsTransport(ProviderTransport): "type": "enabled" if _kimi_thinking_enabled else "disabled", } - # DeepSeek extra_body.thinking + top-level reasoning_effort - is_deepseek = params.get("is_deepseek", False) - if is_deepseek: - _ds_thinking_enabled = True - if reasoning_config and isinstance(reasoning_config, dict): - if reasoning_config.get("enabled") is False: - _ds_thinking_enabled = False - extra_body["thinking"] = { - "type": "enabled" if _ds_thinking_enabled else "disabled", - } - # DeepSeek effort: low/medium→high, high→high, xhigh/max→max - if _ds_thinking_enabled and reasoning_config: - _e = (reasoning_config.get("effort") or "").strip().lower() - if _e in ("xhigh", "max"): - api_kwargs["reasoning_effort"] = "max" - elif _e in ("low", "medium", "high"): - api_kwargs["reasoning_effort"] = _e - # If no effort configured, don't set it → DeepSeek defaults to high - # Reasoning. LM Studio is handled above via top-level reasoning_effort, # so skip emitting extra_body.reasoning for it. if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False): diff --git a/plugins/model-providers/deepseek/__init__.py b/plugins/model-providers/deepseek/__init__.py index 59d738f50fb..f67146df113 100644 --- a/plugins/model-providers/deepseek/__init__.py +++ b/plugins/model-providers/deepseek/__init__.py @@ -1,9 +1,88 @@ -"""DeepSeek provider profile.""" +"""DeepSeek provider profile. + +DeepSeek's V4 family (and the legacy ``deepseek-reasoner``) defaults to +thinking-mode ON when ``extra_body.thinking`` is unset. The API then returns +``reasoning_content`` and starts enforcing the contract that subsequent turns +echo it back; combined with how Hermes replays history this lands on the +notorious HTTP 400 ``reasoning_content must be passed back`` error after the +first tool call (#15700, #17212, #17825). + +This profile overrides :meth:`build_api_kwargs_extras` to mirror the Kimi / +Moonshot wire shape that DeepSeek's OpenAI-compat endpoint expects: + + {"reasoning_effort": "", + "extra_body": {"thinking": {"type": "enabled" | "disabled"}}} + +Non-thinking models (only ``deepseek-chat`` today, which is V3) are left as +no-ops so we don't perturb the V3 wire format. +""" + +from __future__ import annotations + +from typing import Any from providers import register_provider from providers.base import ProviderProfile -deepseek = ProviderProfile( + +def _model_supports_thinking(model: str | None) -> bool: + """DeepSeek thinking-capable model families. + + Currently covers the V4 family (``deepseek-v4-pro``, ``deepseek-v4-flash``, + and any future ``deepseek-v4-*`` variants) and the legacy + ``deepseek-reasoner`` (R1). ``deepseek-chat`` is V3 with no thinking mode. + """ + m = (model or "").strip().lower() + if not m: + return False + if m.startswith("deepseek-v") and not m.startswith("deepseek-v3"): + # deepseek-v4-*, deepseek-v5-*, etc. — every V4+ generation has + # thinking. v3 explicitly excluded. + return True + if m == "deepseek-reasoner": + return True + return False + + +class DeepSeekProfile(ProviderProfile): + """DeepSeek — extra_body.thinking + top-level reasoning_effort.""" + + def build_api_kwargs_extras( + self, *, reasoning_config: dict | None = None, model: str | None = None, **context + ) -> tuple[dict[str, Any], dict[str, Any]]: + extra_body: dict[str, Any] = {} + top_level: dict[str, Any] = {} + + if not _model_supports_thinking(model): + # V3 / unknown — leave wire format untouched, current behavior. + return extra_body, top_level + + # Determine enabled/disabled. Default is enabled to match DeepSeek's + # API default; the API requires this to be set explicitly to avoid the + # reasoning_content echo trap on subsequent turns. + enabled = True + if isinstance(reasoning_config, dict) and reasoning_config.get("enabled") is False: + enabled = False + + extra_body["thinking"] = {"type": "enabled" if enabled else "disabled"} + + if not enabled: + return extra_body, top_level + + # Effort mapping. Pass low/medium/high through; xhigh/max → max. + # When no effort is set we omit reasoning_effort so DeepSeek applies + # its server default (currently high). + if isinstance(reasoning_config, dict): + effort = (reasoning_config.get("effort") or "").strip().lower() + if effort in ("xhigh", "max"): + top_level["reasoning_effort"] = "max" + elif effort in ("low", "medium", "high"): + top_level["reasoning_effort"] = effort + + return extra_body, top_level + + +deepseek = DeepSeekProfile( name="deepseek", aliases=("deepseek-chat",), env_vars=("DEEPSEEK_API_KEY",), diff --git a/run_agent.py b/run_agent.py index c9aa3157170..2b20d48ede2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9798,7 +9798,6 @@ class AIAgent: ) _is_tokenhub = base_url_host_matches(self._base_url_lower, "tokenhub.tencentmaas.com") _is_lmstudio = (self.provider or "").strip().lower() == "lmstudio" - _is_deepseek = base_url_host_matches(self.base_url, "api.deepseek.com") # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE # sentinel (temperature omitted entirely), a numeric override, or None. @@ -9910,7 +9909,6 @@ class AIAgent: is_kimi=_is_kimi, is_tokenhub=_is_tokenhub, is_lmstudio=_is_lmstudio, - is_deepseek=_is_deepseek, is_custom_provider=self.provider == "custom", ollama_num_ctx=self._ollama_num_ctx, provider_preferences=_prefs or None, @@ -10370,11 +10368,6 @@ class AIAgent: # context compaction). Don't pass null to the API. api_msg.pop("reasoning_content", None) - # DeepSeek: strip reasoning_content on all assistant messages so the API - # doesn't return 400 when the model was invoked with thinking enabled. - if base_url_host_matches(self.base_url, "api.deepseek.com"): - api_msg.pop("reasoning_content", None) - @staticmethod def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict: """Strip Codex Responses API fields from tool_calls for strict providers. diff --git a/scripts/release.py b/scripts/release.py index aafa626329e..6084e0754c0 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -73,6 +73,7 @@ AUTHOR_MAP = { "teknium@nousresearch.com": "teknium1", "piyushvp1@gmail.com": "thelumiereguy", "421774554@qq.com": "wuli666", + "twebefy@gmail.com": "tw2818", "harish.kukreja@gmail.com": "counterposition", "korkyzer@gmail.com": "Korkyzer", "1046611633@qq.com": "zhengyn0001", diff --git a/tests/plugins/model_providers/test_deepseek_profile.py b/tests/plugins/model_providers/test_deepseek_profile.py new file mode 100644 index 00000000000..c53e70070a8 --- /dev/null +++ b/tests/plugins/model_providers/test_deepseek_profile.py @@ -0,0 +1,184 @@ +"""Unit tests for the DeepSeek provider profile's thinking-mode wiring. + +DeepSeek V4 (and the legacy ``deepseek-reasoner``) expects every request to +carry an explicit ``extra_body.thinking`` parameter. Omitting it makes the +server default to thinking-mode ON, which then enforces the +``reasoning_content``-must-be-echoed-back contract on subsequent turns and +breaks the conversation with HTTP 400 (#15700, #17212, #17825). + +These tests pin the profile's wire-shape contract so DeepSeek requests stay +correctly shaped without going live. +""" + +from __future__ import annotations + +import pytest + + +@pytest.fixture +def deepseek_profile(): + """Resolve the registered DeepSeek profile. + + Going through ``providers.get_provider_profile`` keeps the test honest — + if someone later replaces the registered class with a plain + ``ProviderProfile``, every assertion below collapses. + """ + # ``model_tools`` triggers plugin discovery on import, which is what + # registers the DeepSeek profile in the global provider registry. + import model_tools # noqa: F401 + import providers + + profile = providers.get_provider_profile("deepseek") + assert profile is not None, "deepseek provider profile must be registered" + return profile + + +class TestDeepSeekThinkingWireShape: + """``build_api_kwargs_extras`` produces DeepSeek's exact wire format.""" + + def test_v4_pro_default_enables_thinking_without_effort(self, deepseek_profile): + """No reasoning_config → thinking enabled, server picks default effort.""" + extra_body, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config=None, model="deepseek-v4-pro" + ) + assert extra_body == {"thinking": {"type": "enabled"}} + assert top_level == {} + + def test_v4_pro_enabled_with_high_effort(self, deepseek_profile): + extra_body, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "high"}, + model="deepseek-v4-pro", + ) + assert extra_body == {"thinking": {"type": "enabled"}} + assert top_level == {"reasoning_effort": "high"} + + @pytest.mark.parametrize("effort", ["low", "medium", "high"]) + def test_standard_efforts_pass_through(self, deepseek_profile, effort): + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": effort}, + model="deepseek-v4-pro", + ) + assert top_level == {"reasoning_effort": effort} + + @pytest.mark.parametrize("effort", ["xhigh", "max", "MAX", " Max "]) + def test_xhigh_and_max_normalize_to_max(self, deepseek_profile, effort): + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": effort}, + model="deepseek-v4-pro", + ) + assert top_level == {"reasoning_effort": "max"} + + def test_explicitly_disabled_sends_disabled_marker(self, deepseek_profile): + """``reasoning_config.enabled=False`` → ``thinking.type=disabled``. + + The crucial bit is that the parameter is *sent* at all — DeepSeek + defaults to thinking-on when ``thinking`` is absent. + """ + extra_body, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False}, model="deepseek-v4-pro" + ) + assert extra_body == {"thinking": {"type": "disabled"}} + # No effort when disabled — DeepSeek rejects it. + assert top_level == {} + + def test_disabled_ignores_effort_field(self, deepseek_profile): + """Effort silently dropped when thinking is off.""" + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False, "effort": "high"}, + model="deepseek-v4-pro", + ) + assert top_level == {} + + def test_unknown_effort_omits_top_level(self, deepseek_profile): + """Garbage effort → omit reasoning_effort so DeepSeek applies its default.""" + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "garbage"}, + model="deepseek-v4-pro", + ) + assert top_level == {} + + def test_empty_effort_omits_top_level(self, deepseek_profile): + _, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": ""}, + model="deepseek-v4-pro", + ) + assert top_level == {} + + +class TestDeepSeekModelGating: + """V4 family + ``deepseek-reasoner`` get thinking; V3 stays untouched.""" + + @pytest.mark.parametrize( + "model", + [ + "deepseek-v4-pro", + "deepseek-v4-flash", + "deepseek-v4-future-variant", + "deepseek-reasoner", + "DEEPSEEK-V4-PRO", # case-insensitive + ], + ) + def test_thinking_capable_models_emit_thinking(self, deepseek_profile, model): + extra_body, _ = deepseek_profile.build_api_kwargs_extras( + reasoning_config=None, model=model + ) + assert extra_body == {"thinking": {"type": "enabled"}} + + @pytest.mark.parametrize( + "model", + [ + "deepseek-chat", # V3 alias + "deepseek-v3-0324", # explicit V3 + "deepseek-v3.1", # V3 minor revisions + "", # bare/unknown + None, # missing + "deepseek-unknown", # unrecognized + ], + ) + def test_non_thinking_models_emit_nothing(self, deepseek_profile, model): + extra_body, top_level = deepseek_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "high"}, model=model + ) + assert extra_body == {} + assert top_level == {} + + +class TestDeepSeekFullKwargsIntegration: + """End-to-end: the transport's full kwargs match DeepSeek's live wire format. + + The live test harness in ``tests/run_agent/test_deepseek_v4_thinking_live.py`` + sends ``{"reasoning_effort": "high", "extra_body": {"thinking": {"type": + "enabled"}}}``. Confirm the transport produces that exact shape when wired + through the registered DeepSeek profile. + """ + + def test_full_kwargs_match_live_wire_shape(self, deepseek_profile): + from agent.transports.chat_completions import ChatCompletionsTransport + + kwargs = ChatCompletionsTransport().build_kwargs( + model="deepseek-v4-pro", + messages=[{"role": "user", "content": "ping"}], + tools=None, + provider_profile=deepseek_profile, + reasoning_config={"enabled": True, "effort": "high"}, + base_url="https://api.deepseek.com/v1", + provider_name="deepseek", + ) + assert kwargs["model"] == "deepseek-v4-pro" + assert kwargs["reasoning_effort"] == "high" + assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}} + + def test_v3_chat_full_kwargs_omit_thinking(self, deepseek_profile): + from agent.transports.chat_completions import ChatCompletionsTransport + + kwargs = ChatCompletionsTransport().build_kwargs( + model="deepseek-chat", + messages=[{"role": "user", "content": "ping"}], + tools=None, + provider_profile=deepseek_profile, + reasoning_config={"enabled": True, "effort": "high"}, + base_url="https://api.deepseek.com/v1", + provider_name="deepseek", + ) + assert "reasoning_effort" not in kwargs + assert "extra_body" not in kwargs or "thinking" not in kwargs.get("extra_body", {}) From dc4cde278ba0523c01c2c29988e59a567a19ef22 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 17:04:30 -0700 Subject: [PATCH 092/218] feat(docs): show per-skill pages in the left sidebar (#26646) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Individual skill pages (e.g. /docs/user-guide/skills/bundled/productivity/notion) had no sidebar rendered — the sidebar config only listed the two catalog index pages. That was an intentional choice from an earlier 'too many entries would drown product docs' concern, but the effect is that a user landing on any skill page (via search, share link, or the catalog table) loses navigation entirely and can't see related skills. Wire build_sidebar_items() (which was already computed and discarded) back into the sidebar. Structure: Skills ├── Bundled skills catalog (catalog table, was already there) ├── Optional skills catalog (catalog table, was already there) ├── Bundled │ ├── apple/ │ │ ├── apple-apple-notes │ │ └── ... │ └── ... (one collapsed category per skill category) └── Optional └── ... (same) Categories are collapsed by default so the top-level Skills entry doesn't explode visually. Users browsing one skill see siblings in the same category; the catalogs remain the at-a-glance entry point. Also includes drift the regen script naturally produces on top of current main: - creative-comfyui v5.0.0 → v5.1.0 page (author + new ref file) - devops-kanban-worker SKILL.md updates - new pages for optional skills that lacked generated docs: hyperliquid, finance-stocks, software-development/rest-graphql-debug - updated optional-skills-catalog row for those Validation: - npx docusaurus build (en locale) succeeded — only pre-existing warnings - inspected built productivity-notion/index.html: sidebar tree present, sibling productivity skills (airtable, linear, etc.) all linked --- .../docs/reference/optional-skills-catalog.md | 8 + .../bundled/creative/creative-comfyui.md | 10 +- .../bundled/devops/devops-kanban-worker.md | 23 + .../blockchain/blockchain-hyperliquid.md | 228 ++++++++ .../skills/optional/finance/finance-stocks.md | 112 ++++ ...software-development-rest-graphql-debug.md | 531 ++++++++++++++++++ website/scripts/generate-skill-docs.py | 94 +++- website/sidebars.ts | 438 +++++++++++++++ 8 files changed, 1411 insertions(+), 33 deletions(-) create mode 100644 website/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid.md create mode 100644 website/docs/user-guide/skills/optional/finance/finance-stocks.md create mode 100644 website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md index 8c4c2f36432..d5839f846d1 100644 --- a/website/docs/reference/optional-skills-catalog.md +++ b/website/docs/reference/optional-skills-catalog.md @@ -39,6 +39,7 @@ hermes skills uninstall | Skill | Description | |-------|-------------| | [**evm**](/docs/user-guide/skills/optional/blockchain/blockchain-evm) | Read-only EVM client: wallets, tokens, gas across 8 chains. | +| [**hyperliquid**](/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid) | Hyperliquid market data, account history, trade review. | | [**solana**](/docs/user-guide/skills/optional/blockchain/blockchain-solana) | Query Solana blockchain data with USD pricing — wallet balances, token portfolios with values, transaction details, NFTs, whale detection, and live network stats. Uses Solana RPC + CoinGecko. No API key required. | ## communication @@ -88,6 +89,7 @@ hermes skills uninstall | [**lbo-model**](/docs/user-guide/skills/optional/finance/finance-lbo-model) | Build leveraged buyout models in Excel — sources & uses, debt schedule, cash sweep, exit multiple, IRR/MOIC sensitivity. Pairs with excel-author. Use for PE screening, sponsor-case valuation, or illustrative LBO in a pitch. | | [**merger-model**](/docs/user-guide/skills/optional/finance/finance-merger-model) | Build accretion/dilution (merger) models in Excel — pro-forma P&L, synergies, financing mix, EPS impact. Pairs with excel-author. Use for M&A pitches, board materials, or deal evaluation. | | [**pptx-author**](/docs/user-guide/skills/optional/finance/finance-pptx-author) | Build PowerPoint decks headless with python-pptx. Pairs with excel-author for model-backed decks where every number traces to a workbook cell. Use for pitch decks, IC memos, earnings notes. | +| [**stocks**](/docs/user-guide/skills/optional/finance/finance-stocks) | Stock quotes, history, search, compare, crypto via Yahoo. | ## health @@ -176,6 +178,12 @@ hermes skills uninstall | [**oss-forensics**](/docs/user-guide/skills/optional/security/security-oss-forensics) | Supply chain investigation, evidence recovery, and forensic analysis for GitHub repositories. Covers deleted commit recovery, force-push detection, IOC extraction, multi-source evidence collection, hypothesis formation/validation, and st... | | [**sherlock**](/docs/user-guide/skills/optional/security/security-sherlock) | OSINT username search across 400+ social networks. Hunt down social media accounts by username. | +## software-development + +| Skill | Description | +|-------|-------------| +| [**rest-graphql-debug**](/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug) | Debug REST/GraphQL APIs: status codes, auth, schemas, repro. | + ## web-development | Skill | Description | diff --git a/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md b/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md index 7877e174c7a..38610be8b83 100644 --- a/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md +++ b/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md @@ -16,8 +16,8 @@ Generate images, video, and audio with ComfyUI — install, launch, manage nodes |---|---| | Source | Bundled (installed by default) | | Path | `skills/creative/comfyui` | -| Version | `5.0.0` | -| Author | ['kshitijk4poor', 'alt-glitch'] | +| Version | `5.1.0` | +| Author | ['kshitijk4poor', 'alt-glitch', 'purzbeats'] | | License | MIT | | Platforms | macos, linux, windows | | Tags | `comfyui`, `image-generation`, `stable-diffusion`, `flux`, `sd3`, `wan-video`, `hunyuan-video`, `creative`, `generative-ai`, `video-generation` | @@ -42,6 +42,12 @@ for workflow execution. - `official-cli.md` — every `comfy ...` command, with flags - `rest-api.md` — REST + WebSocket endpoints (local + cloud), payload schemas - `workflow-format.md` — API-format JSON, common node types, param mapping +- `template-integrity.md` — converting `comfyui-workflow-templates` from + editor format to API format: Reroute bypass, dotted dynamic-input keys + (`values.a`, `resize_type.width`), Cloud quirks (302 redirect, 1 concurrent + free-tier job, 1080p VRAM ceiling), Discord-compatible ffmpeg stitch. + Authored by [@purzbeats](https://github.com/purzbeats). Load this whenever + you're starting from an official template. **Scripts (`scripts/`):** diff --git a/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md b/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md index dac9de9f174..28d51c17887 100644 --- a/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md +++ b/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md @@ -65,6 +65,29 @@ kanban_complete( ) ``` +**Coding task that needs human review (review-required):** + +For most code-changing tasks, the work isn't truly *done* until a human reviewer has eyes on it. Block instead of complete, with `reason` prefixed `review-required: ` so the dashboard surfaces the row as needing review. Drop the structured metadata (changed files, test counts, diff/PR url) into a comment first, since `kanban_block` only carries the human-readable reason — comments are the durable annotation channel. Reviewer either approves and runs `hermes kanban unblock ` (which re-spawns you with the comment thread for any follow-ups) or asks for changes via another comment. + +```python +import json + +kanban_comment( + body="review-required handoff:\n" + json.dumps({ + "changed_files": ["rate_limiter.py", "tests/test_rate_limiter.py"], + "tests_run": 14, + "tests_passed": 14, + "diff_path": "/path/to/worktree", # or PR url if pushed + "decisions": ["user_id primary, IP fallback for unauthenticated requests"], + }, indent=2), +) +kanban_block( + reason="review-required: rate limiter shipped, 14/14 tests pass — needs eyes on the user_id/IP fallback choice before merging", +) +``` + +Use `kanban_complete` only when the task is genuinely terminal — e.g. a one-line typo fix, a docs change with no functional consequences, or a research task where the artifact IS the writeup itself. + **Research task:** ```python kanban_complete( diff --git a/website/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid.md b/website/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid.md new file mode 100644 index 00000000000..8651bc979f6 --- /dev/null +++ b/website/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid.md @@ -0,0 +1,228 @@ +--- +title: "Hyperliquid — Hyperliquid market data, account history, trade review" +sidebar_label: "Hyperliquid" +description: "Hyperliquid market data, account history, trade review" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Hyperliquid + +Hyperliquid market data, account history, trade review. + +## Skill metadata + +| | | +|---|---| +| Source | Optional — install with `hermes skills install official/blockchain/hyperliquid` | +| Path | `optional-skills/blockchain/hyperliquid` | +| Version | `0.1.0` | +| Author | Hugo Sequier (Hugo-SEQUIER), Hermes Agent | +| License | MIT | +| Platforms | linux, macos, windows | +| Tags | `Hyperliquid`, `Blockchain`, `Crypto`, `Trading`, `Perpetuals`, `Spot`, `DeFi` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Hyperliquid Skill + +Query Hyperliquid market and account data through the public `/info` endpoint. +Read-only — no API key, no signing, no order placement. + +12 commands: `dexs`, `markets`, `spots`, `candles`, `funding`, `l2`, `state`, +`spot-balances`, `fills`, `orders`, `review`, `export`. Stdlib only +(`urllib`, `json`, `argparse`). + +--- + +## When to Use + +- User asks for Hyperliquid perp or spot market data, candles, funding, or L2 book +- User wants to inspect a wallet's perp positions, spot balances, fills, or orders +- User wants a post-trade review combining recent fills with market context +- User wants to inspect builder-deployed perp dexs or HIP-3 markets +- User wants a normalized JSON export of candles + funding for backtesting prep + +--- + +## Prerequisites + +Stdlib only — no external packages, no API key. + +The script reads `~/.hermes/.env` for two optional defaults: + +- `HYPERLIQUID_API_URL` — defaults to `https://api.hyperliquid.xyz`. Set to + `https://api.hyperliquid-testnet.xyz` for testnet. +- `HYPERLIQUID_USER_ADDRESS` — default address for `state`, `spot-balances`, + `fills`, `orders`, and `review`. If unset, pass the address as the first + positional argument. + +A project `.env` in the current working directory is honored as a dev fallback. + +Helper script: `~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py` + +--- + +## How to Run + +Invoke through the `terminal` tool: + +```bash +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py [args] +``` + +Add `--json` to any command for machine-readable output. + +--- + +## Quick Reference + +```bash +hyperliquid_client.py dexs +hyperliquid_client.py markets [--dex DEX] [--limit N] [--sort volume|oi|funding_abs|change_abs|name] +hyperliquid_client.py spots [--limit N] +hyperliquid_client.py candles [--interval 1h] [--hours 24] [--limit N] +hyperliquid_client.py funding [--hours 72] [--limit N] +hyperliquid_client.py l2 [--levels N] +hyperliquid_client.py state [address] [--dex DEX] +hyperliquid_client.py spot-balances [address] [--limit N] +hyperliquid_client.py fills [address] [--hours N] [--limit N] [--aggregate-by-time] +hyperliquid_client.py orders [address] [--limit N] +hyperliquid_client.py review [address] [--coin COIN] [--hours N] [--fills N] +hyperliquid_client.py export [--interval 1h] [--hours N] [--output PATH] +``` + +For `state`, `spot-balances`, `fills`, `orders`, and `review`, the address is +optional when `HYPERLIQUID_USER_ADDRESS` is set in `~/.hermes/.env`. + +--- + +## Procedure + +### 1. Discover DEXs and Markets + +```bash +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py dexs + +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + markets --limit 15 --sort volume + +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + spots --limit 15 +``` + +- `--dex` only applies to perp endpoints; omit for the first perp dex. +- Spot pairs may show as `PURR/USDC` or aliases like `@107`. +- HIP-3 markets prefix the coin with the dex, e.g. `mydex:BTC`. + +### 2. Pull Historical Market Data + +```bash +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + candles BTC --interval 1h --hours 72 --limit 48 + +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + funding BTC --hours 168 --limit 30 +``` + +Time-range endpoints paginate. For larger windows, repeat with a later +`startTime` or use `export` (below). + +### 3. Inspect Live Order Book + +```bash +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + l2 BTC --levels 10 +``` + +Use when asked about book depth, near-term liquidity, or potential market +impact of a large order. + +### 4. Review an Account + +```bash +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + state 0xabc... + +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + spot-balances +``` + +`state` returns perp positions; `spot-balances` returns spot inventory. +Use these for "how are my positions?", "what am I holding?", "how much is +withdrawable?". + +### 5. Review Fills and Orders + +```bash +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + fills 0xabc... --hours 72 --limit 25 + +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + orders --limit 25 +``` + +### 6. Generate a Trade Review + +```bash +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + review 0xabc... --hours 72 --fills 50 + +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + review --coin BTC --hours 168 +``` + +Reports realized PnL, fees, win/loss counts, coin breakdowns, market trend +and average funding for each traded perp, plus heuristics (fee drag, +concentration, counter-trend losses). + +For deeper post-trade analysis: start with `review` to find problem coins +or windows → pull `fills` and `orders` for that period → pull `candles` +and `funding` for each traded coin → judge decision quality separately +from outcome quality. + +### 7. Export a Reusable Dataset + +```bash +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + export BTC --interval 1h --hours 168 --output ./btc-1h-7d.json + +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + export BTC --interval 15m --hours 72 --end-time-ms 1760000000000 +``` + +Output JSON contains: schema version, source metadata, exact time window, +normalized candle rows, normalized funding rows, summary stats. Use +`--end-time-ms` for reproducible windows. + +--- + +## Pitfalls + +- Public info endpoints are rate-limited. Large historical queries may + return capped windows; iterate with later `startTime` values. +- `fills --hours ...` uses `userFillsByTime`, which only exposes a + recent rolling window — not full archive history. +- `historicalOrders` returns recent orders only; not a full export. +- The `review` command is heuristic. It cannot reconstruct intent, + order placement quality, or true slippage from fills alone. +- The `export` command writes a normalized dataset, not a backtest + engine. You still need your own slippage/fill model. +- Spot aliases like `@107` are valid identifiers even when the UI shows + a friendlier name. +- `l2` is a point-in-time snapshot, not a time series. + +--- + +## Verification + +```bash +python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \ + markets --limit 5 +``` + +Should print the top Hyperliquid perp markets by 24h notional volume. diff --git a/website/docs/user-guide/skills/optional/finance/finance-stocks.md b/website/docs/user-guide/skills/optional/finance/finance-stocks.md new file mode 100644 index 00000000000..7c43dea3065 --- /dev/null +++ b/website/docs/user-guide/skills/optional/finance/finance-stocks.md @@ -0,0 +1,112 @@ +--- +title: "Stocks — Stock quotes, history, search, compare, crypto via Yahoo" +sidebar_label: "Stocks" +description: "Stock quotes, history, search, compare, crypto via Yahoo" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Stocks + +Stock quotes, history, search, compare, crypto via Yahoo. + +## Skill metadata + +| | | +|---|---| +| Source | Optional — install with `hermes skills install official/finance/stocks` | +| Path | `optional-skills/finance/stocks` | +| Version | `0.1.0` | +| Author | Mibay (Mibayy), Hermes Agent | +| License | MIT | +| Platforms | linux, macos, windows | +| Tags | `Stocks`, `Finance`, `Market`, `Crypto`, `Investing` | +| Related skills | [`dcf-model`](/docs/user-guide/skills/optional/finance/finance-dcf-model), [`comps-analysis`](/docs/user-guide/skills/optional/finance/finance-comps-analysis), [`lbo-model`](/docs/user-guide/skills/optional/finance/finance-lbo-model) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Stocks Skill + +Read-only market data via Yahoo Finance. Five commands: `quote`, `search`, +`history`, `compare`, `crypto`. Python stdlib only — no API key, no pip +installs. Yahoo's endpoint is unofficial and may rate-limit or change. + +## When to Use + +- User asks for a current stock price (AAPL, TSLA, MSFT, ...) +- User wants to look up a ticker by company name +- User wants OHLCV history or performance over a date range +- User wants to compare several tickers side by side +- User asks for a crypto price (BTC, ETH, SOL, ...) + +## Prerequisites + +Python 3.8+ stdlib only. Optional: set `ALPHA_VANTAGE_KEY` to enrich +`market_cap`, `pe_ratio`, and 52-week levels when Yahoo's crumb-protected +fields come back null. Free key: https://www.alphavantage.co/support/#api-key + +## How to Run + +Invoke through the `terminal` tool. Once installed: + +``` +SCRIPT=~/.hermes/skills/finance/stocks/scripts/stocks_client.py +python3 $SCRIPT quote AAPL +``` + +All output is JSON on stdout — pipe through `jq` if you want to slice it. + +## Quick Reference + +``` +python3 $SCRIPT quote AAPL +python3 $SCRIPT quote AAPL MSFT GOOGL TSLA +python3 $SCRIPT search "Tesla" +python3 $SCRIPT history NVDA --range 6mo +python3 $SCRIPT compare AAPL MSFT GOOGL +python3 $SCRIPT crypto BTC ETH SOL +``` + +## Commands + +### `quote SYMBOL [SYMBOL2 ...]` + +Current price, change, change%, volume, 52-week high/low. + +### `search QUERY` + +Find tickers by company name. Returns top 5: symbol, name, exchange, type. + +### `history SYMBOL [--range RANGE]` + +Daily OHLCV plus stats (min, max, avg, total return %). Ranges: `1mo`, +`3mo`, `6mo`, `1y`, `5y`. Default: `1mo`. + +### `compare SYMBOL1 SYMBOL2 [...]` + +Side-by-side: price, change%, 52-week performance. + +### `crypto SYMBOL [SYMBOL2 ...]` + +Crypto prices. Pass `BTC` (the script appends `-USD` automatically). + +## Pitfalls + +- Yahoo Finance's API is unofficial. Endpoints can change or rate-limit + without notice — if requests start failing, that's why. +- `market_cap` and `pe_ratio` may return null on `quote` when Yahoo's + crumb session isn't established. Set `ALPHA_VANTAGE_KEY` to backfill. +- Add a small delay between bulk requests to avoid rate-limiting. +- This is read-only — no order placement, no account integration. + +## Verification + +``` +python3 ~/.hermes/skills/finance/stocks/scripts/stocks_client.py quote AAPL +``` + +Returns a JSON object with `symbol: "AAPL"` and a numeric `price` field. diff --git a/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md b/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md new file mode 100644 index 00000000000..0698d855f5f --- /dev/null +++ b/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md @@ -0,0 +1,531 @@ +--- +title: "Rest Graphql Debug — Debug REST/GraphQL APIs: status codes, auth, schemas, repro" +sidebar_label: "Rest Graphql Debug" +description: "Debug REST/GraphQL APIs: status codes, auth, schemas, repro" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Rest Graphql Debug + +Debug REST/GraphQL APIs: status codes, auth, schemas, repro. + +## Skill metadata + +| | | +|---|---| +| Source | Optional — install with `hermes skills install official/software-development/rest-graphql-debug` | +| Path | `optional-skills/software-development/rest-graphql-debug` | +| Version | `1.2.0` | +| Author | eren-karakus0 | +| License | MIT | +| Tags | `api`, `rest`, `graphql`, `http`, `debugging`, `testing`, `curl`, `integration` | +| Related skills | [`systematic-debugging`](/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# API Testing & Debugging + +Drive REST and GraphQL diagnosis through Hermes tools — `terminal` for `curl`, `execute_code` for Python `requests`, `web_extract` for vendor docs. Isolate the failing layer before guessing at the fix. + +## When to Use + +- API returns unexpected status or body +- Auth fails (401/403 after token refresh, OAuth, API key) +- Works in Postman but fails in code +- Webhook / callback integration debugging +- Building or reviewing API integration tests +- Rate limiting or pagination issues + +Skip for UI rendering, DB query tuning, or DNS/firewall infra (escalate). + +## Core Principle + +**Isolate the layer, then fix.** A 200 OK can hide broken data. A 500 can mask a one-character auth typo. Walk the chain in order; never skip a step. + +``` +1. Connectivity → can we reach the host at all? +1.5 Timeouts → connect-slow vs read-slow? +2. TLS/SSL → cert valid and trusted? +3. Auth → credentials correct and unexpired? +4. Request format → payload shape match server expectations? +5. Response parse → does our code accept what came back? +6. Semantics → does the data mean what we assume? +``` + +## 5-Minute Quickstart + +### REST via terminal + +```python +# Verbose request/response exchange +terminal('curl -v https://api.example.com/users/1') + +# POST with JSON +terminal("""curl -X POST https://api.example.com/users \\ + -H 'Content-Type: application/json' \\ + -H "Authorization: Bearer $TOKEN" \\ + -d '{"name":"test","email":"test@example.com"}'""") + +# Headers only +terminal('curl -sI https://api.example.com/health') + +# Pretty-print JSON +terminal('curl -s https://api.example.com/users | python3 -m json.tool') +``` + +### GraphQL via terminal + +```python +terminal("""curl -X POST https://api.example.com/graphql \\ + -H 'Content-Type: application/json' \\ + -H "Authorization: Bearer $TOKEN" \\ + -d '{"query":"{ user(id: 1) { name email } }"}'""") +``` + +**GraphQL gotcha:** servers often return HTTP 200 even when the query failed. Always inspect the `errors` field regardless of status code: + +```python +execute_code(''' +import os, requests +resp = requests.post( + "https://api.example.com/graphql", + json={"query": "{ user(id: 1) { name email } }"}, + headers={"Authorization": f"Bearer {os.environ['TOKEN']}"}, + timeout=10, +) +data = resp.json() +if data.get("errors"): + for err in data["errors"]: + print(f"GraphQL error: {err['message']} (path: {err.get('path')})") +print(data.get("data")) +''') +``` + +### Python (requests) via execute_code + +```python +execute_code(''' +import requests +resp = requests.get( + "https://api.example.com/users/1", + headers={"Authorization": "Bearer "}, + timeout=(3.05, 30), # (connect, read) +) +print(resp.status_code, dict(resp.headers)) +print(resp.text[:500]) +''') +``` + +## Layered Debug Flow + +### Step 1 — Connectivity + +```python +terminal('nslookup api.example.com') +terminal('curl -v --connect-timeout 5 https://api.example.com/health') +``` + +Failures: DNS not resolving, firewall, VPN required, proxy missing. + +### Step 1.5 — Timeouts + +Distinguish *can't reach* from *reaches but slow*: + +```python +terminal('''curl -w "dns:%{time_namelookup}s connect:%{time_connect}s tls:%{time_appconnect}s ttfb:%{time_starttransfer}s total:%{time_total}s\\n" \\ + -o /dev/null -s https://api.example.com/endpoint''') +``` + +In Python, always pass a tuple timeout — `requests` has no default and will hang forever: + +```python +execute_code(''' +import requests +from requests.exceptions import ConnectTimeout, ReadTimeout +try: + requests.get(url, timeout=(3.05, 30)) +except ConnectTimeout: + print("Cannot reach host — DNS, firewall, VPN") +except ReadTimeout: + print("Connected but server is slow") +''') +``` + +Diagnosis: high `time_connect` is network/firewall; high `time_starttransfer` with low `time_connect` is a slow server. + +### Step 2 — TLS/SSL + +```python +terminal('curl -vI https://api.example.com 2>&1 | grep -E "SSL|subject|expire|issuer"') +``` + +Failures: expired cert, self-signed, hostname mismatch, missing CA bundle. Use `-k` only for ad-hoc debug, never in code. + +### Step 3 — Authentication + +```python +# Token validity check +terminal('curl -s -o /dev/null -w "%{http_code}\\n" -H "Authorization: Bearer $TOKEN" https://api.example.com/me') + +# Decode JWT exp claim — handles base64url padding correctly +execute_code(''' +import json, base64, os +tok = os.environ["TOKEN"] +payload = tok.split(".")[1] +payload += "=" * (-len(payload) % 4) +print(json.dumps(json.loads(base64.urlsafe_b64decode(payload)), indent=2)) +''') +``` + +Checklist: +- Token expired? (`exp` claim in JWT) +- Right scheme? Bearer vs Basic vs Token vs `X-Api-Key` +- Right environment? Staging key on prod is a classic +- API key in header vs query param (`?api_key=…`)? + +### Step 4 — Request Format + +```python +terminal("""curl -v -X POST https://api.example.com/endpoint \\ + -H 'Content-Type: application/json' \\ + -d '{"key":"value"}' 2>&1""") +``` + +**Content-Type / body mismatch — the silent 415/400:** + +```python +# WRONG — data= sends form-encoded, header lies +requests.post(url, data='{"k":"v"}', headers={"Content-Type": "application/json"}) + +# RIGHT — json= auto-sets header AND serializes +requests.post(url, json={"k": "v"}) + +# WRONG — Accept says XML, code calls .json() +requests.get(url, headers={"Accept": "text/xml"}) + +# RIGHT — let requests build multipart with boundary +requests.post(url, files={"file": open("doc.pdf", "rb")}) +``` + +Common: form-encoded vs JSON, missing required fields, wrong HTTP method, unencoded query params. + +### Step 5 — Response Parsing + +Always inspect content-type before calling `.json()`: + +```python +execute_code(''' +import requests +resp = requests.post(url, json=payload, timeout=10) +print(f"status={resp.status_code}") +print(f"headers={dict(resp.headers)}") +ct = resp.headers.get("Content-Type", "") +if "application/json" in ct: + print(resp.json()) +else: + print(f"unexpected content-type {ct!r}, body={resp.text[:500]!r}") +''') +``` + +Failures: HTML error page where JSON expected, empty body, wrong charset. + +### Step 6 — Semantic Validation + +Parsed cleanly — but is the data *correct*? + +- Does `"status": "active"` mean what your code thinks? +- ID in response matches the one requested? +- Timestamps in expected timezone? +- Pagination returning all results, or just page 1? + +## HTTP Status Playbook + +### 401 Unauthorized — credentials missing or invalid + +1. `Authorization` header actually present? (`curl -v` to confirm) +2. Token correct and unexpired? +3. Right auth scheme? (`Bearer` vs `Basic` vs `Token`) +4. Some APIs use query param (`?api_key=…`) instead of header. + +### 403 Forbidden — authenticated but not authorized + +1. Token has the required scopes/permissions? +2. Resource owned by a different account? +3. IP allowlist blocking you? +4. CORS in browser? (check `Access-Control-Allow-Origin`) + +### 404 Not Found — resource doesn't exist or URL is wrong + +1. Path correct? (trailing slash, typo, version prefix) +2. Resource ID exists? +3. Right API version (`/v1/` vs `/v2/`)? +4. Right base URL (staging vs prod)? + +### 409 Conflict — state collision + +1. Resource already exists (duplicate create)? +2. Stale `ETag` / `If-Match`? +3. Concurrent modification by another process? + +### 422 Unprocessable Entity — valid JSON, invalid data + +The error body usually names the bad fields. Check: +- Field types (string vs int, date format) +- Required vs optional +- Enum values inside the allowed set + +### 429 Too Many Requests — rate limited + +Check `Retry-After` and `X-RateLimit-*` headers. Exponential backoff: + +```python +execute_code(''' +import time, requests + +def with_backoff(method, url, **kwargs): + for attempt in range(5): + resp = requests.request(method, url, **kwargs) + if resp.status_code != 429: + return resp + wait = int(resp.headers.get("Retry-After", 2 ** attempt)) + time.sleep(wait) + return resp +''') +``` + +### 5xx — server-side, usually not your fault + +- **500** — server bug. Capture correlation ID, file with provider. +- **502** — upstream down. Backoff + retry. +- **503** — overloaded / maintenance. Check status page. +- **504** — upstream timeout. Reduce payload or raise timeout. + +For all 5xx: backoff with jitter, alert on persistence. + +## Pagination & Idempotency + +**Pagination.** Verify you're getting *all* results. Look for `next_cursor`, `next_page`, `total_count`. Two patterns: +- Offset (`?limit=100&offset=200`) — simple, can skip items if data shifts. +- Cursor (`?cursor=abc123`) — preferred for live or large datasets. + +**Idempotency.** For non-idempotent operations (POST), send `Idempotency-Key: ` so retries don't double-charge / double-create. Mandatory for payments and orders. + +## Contract Validation + +Catch schema drift before it hits production: + +```python +execute_code(''' +import requests + +def validate_user(data: dict) -> list[str]: + errors = [] + required = {"id": int, "email": str, "created_at": str} + for field, expected in required.items(): + if field not in data: + errors.append(f"missing field: {field}") + elif not isinstance(data[field], expected): + errors.append(f"{field}: want {expected.__name__}, got {type(data[field]).__name__}") + return errors + +resp = requests.get(f"{BASE}/users/1", headers=HEADERS, timeout=10) +issues = validate_user(resp.json()) +if issues: + print(f"contract violations: {issues}") +''') +``` + +Run after API upgrades, when integrating new third parties, or in CI smoke tests. + +## Correlation IDs + +Always capture the provider's request ID — fastest path to vendor support: + +```python +execute_code(''' +import requests +resp = requests.post(url, json=payload, headers=headers, timeout=10) +request_id = ( + resp.headers.get("X-Request-Id") + or resp.headers.get("X-Trace-Id") + or resp.headers.get("CF-Ray") # Cloudflare +) +if resp.status_code >= 400: + print(f"failed status={resp.status_code} req_id={request_id} ts={resp.headers.get('Date')}") +''') +``` + +**Vendor bug-report template:** + +``` +Endpoint: POST /api/v1/orders +Request ID: req_abc123xyz +Timestamp: 2026-03-17T14:30:00Z +Status: 500 +Expected: 201 with order object +Actual: 500 {"error":"internal server error"} +Repro: curl -X POST … (auth: ) +``` + +## Regression Test Template + +Drop this into `tests/` and run via `terminal('pytest tests/test_api_smoke.py -v')`: + +```python +import os, requests, pytest + +BASE_URL = os.environ.get("API_BASE_URL", "https://api.example.com") +TOKEN = os.environ.get("API_TOKEN", "") +HEADERS = {"Authorization": f"Bearer {TOKEN}"} + +class TestAPISmoke: + def test_health(self): + resp = requests.get(f"{BASE_URL}/health", timeout=5) + assert resp.status_code == 200 + + def test_list_users_returns_array(self): + resp = requests.get(f"{BASE_URL}/users", headers=HEADERS, timeout=10) + assert resp.status_code == 200 + data = resp.json() + assert isinstance(data.get("data", data), list) + + def test_get_user_required_fields(self): + resp = requests.get(f"{BASE_URL}/users/1", headers=HEADERS, timeout=10) + assert resp.status_code in (200, 404) + if resp.status_code == 200: + user = resp.json() + assert "id" in user and "email" in user + + def test_invalid_auth_returns_401(self): + resp = requests.get( + f"{BASE_URL}/users", + headers={"Authorization": "Bearer invalid-token"}, + timeout=10, + ) + assert resp.status_code == 401 +``` + +## Security + +### Token handling +- Never log full tokens. Redact: `Bearer `. +- Never hardcode tokens in scripts. Read from env (`os.environ["API_TOKEN"]`) or `~/.hermes/.env`. +- Rotate immediately if a token surfaces in logs, error messages, or git history. + +### Safe logging + +```python +def redact_auth(headers: dict) -> dict: + sensitive = {"authorization", "x-api-key", "cookie", "set-cookie"} + return {k: ("" if k.lower() in sensitive else v) for k, v in headers.items()} +``` + +### Leak checklist + +- [ ] **Credentials in URLs.** API keys in query strings end up in server logs, browser history, referrer headers — use headers. +- [ ] **PII in error responses.** `404 on /users/123` shouldn't reveal whether the user exists (enumeration). +- [ ] **Stack traces in prod.** 500s shouldn't leak file paths, framework versions. +- [ ] **Internal hostnames/IPs.** `10.x.x.x`, `internal-api.corp.local` in error bodies. +- [ ] **Tokens echoed back.** Some APIs include the auth token in error details. Verify they don't. +- [ ] **Verbose `Server` / `X-Powered-By`.** Stack-info leaks. Note for security review. + +## Hermes Tool Patterns + +### terminal — for curl, dig, openssl + +```python +terminal('curl -sI https://api.example.com') +terminal('openssl s_client -connect api.example.com:443 -servername api.example.com /dev/null | openssl x509 -noout -dates') +``` + +### execute_code — for multi-step Python flows + +When debugging spans auth → fetch → paginate → validate, use `execute_code`. Variables persist for the script, results print to stdout, no risk of token spam in your context: + +```python +execute_code(''' +import os, requests + +token = os.environ["API_TOKEN"] +base = "https://api.example.com" +H = {"Authorization": f"Bearer {token}"} + +# 1. auth +me = requests.get(f"{base}/me", headers=H, timeout=10) +print(f"auth {me.status_code}") + +# 2. paginate +all_users, cursor = [], None +while True: + params = {"cursor": cursor} if cursor else {} + r = requests.get(f"{base}/users", headers=H, params=params, timeout=10) + body = r.json() + all_users.extend(body["data"]) + cursor = body.get("next_cursor") + if not cursor: + break +print(f"users={len(all_users)}") +''') +``` + +### web_extract — for vendor API docs + +Pull the spec for the endpoint you're debugging instead of guessing: + +```python +web_extract(urls=["https://docs.example.com/api/v1/users"]) +``` + +### delegate_task — for full CRUD test sweeps + +```python +delegate_task( + goal="Test all CRUD endpoints for /api/v1/users", + context=""" +Follow the rest-graphql-debug skill (optional-skills/software-development/rest-graphql-debug). +Base URL: https://api.example.com +Auth: Bearer token from API_TOKEN env var. + +For each verb (POST, GET, PATCH, DELETE): + - happy path: assert status + response schema + - error cases: 400, 404, 422 + - log a repro curl for any failure (redact tokens) + +Output: pass/fail per endpoint + correlation IDs for failures. +""", + toolsets=["terminal", "file"], +) +``` + +## Output Format + +When reporting findings: + +``` +## Finding +Endpoint: POST /api/v1/users +Status: 422 Unprocessable Entity +Req ID: req_abc123xyz + +## Repro +curl -X POST https://api.example.com/api/v1/users \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer ' \ + -d '{"name":"test"}' + +## Root Cause +Missing required field `email`. Server validation rejects before processing. + +## Fix +-d '{"name":"test","email":"test@example.com"}' +``` + +## Related + +- `systematic-debugging` — once the failing API layer is isolated, root-cause your code +- `test-driven-development` — write the regression test before shipping the fix diff --git a/website/scripts/generate-skill-docs.py b/website/scripts/generate-skill-docs.py index d55c6e55c31..2a0694a61c8 100755 --- a/website/scripts/generate-skill-docs.py +++ b/website/scripts/generate-skill-docs.py @@ -622,38 +622,70 @@ def build_sidebar_items(entries: list[tuple[dict[str, Any], dict[str, Any]]]) -> } -def write_sidebar(entries): - # The per-skill pages (`build_sidebar_items(entries)`) are still generated - # as standalone docs under `website/docs/user-guide/skills/{bundled,optional}/` - # and reachable via the catalog pages in Reference — but we intentionally - # do NOT explode them into the left sidebar. Two hundred-plus skill entries - # drown the actual product docs and make the site feel overwhelming to - # first-time visitors. - # - # Sidebar now shows: - # Skills - # ├── Bundled catalog → (link to reference/skills-catalog) - # └── Optional catalog → (link to reference/optional-skills-catalog) - # - # The catalog pages are auto-regenerated tables with a link to every skill. - # Individual skill pages (including the two formerly hand-written guides, - # godmode and google-workspace) are still reachable at their URLs and are - # linked from the catalog tables and from the Skills overview page — they - # just aren't promoted in the left sidebar, because there's no principled - # rule for which skills would get promoted and which wouldn't. - _ = build_sidebar_items(entries) # still called for any side effects / validation +def _render_sidebar_item(item: Any, indent: int) -> list[str]: + """Render one sidebar item (string doc id, or category dict) as ts lines.""" + pad = " " * indent + lines: list[str] = [] + if isinstance(item, str): + lines.append(f"{pad}'{item}',") + return lines + # category dict + lines.append(f"{pad}{{") + lines.append(f"{pad} type: 'category',") + lines.append(f"{pad} label: '{item['label']}',") + if item.get("collapsed", True): + lines.append(f"{pad} collapsed: true,") + lines.append(f"{pad} items: [") + for child in item.get("items", []): + lines.extend(_render_sidebar_item(child, indent + 4)) + lines.append(f"{pad} ],") + lines.append(f"{pad}}},") + return lines - skills_subtree = ( - " {\n" - " type: 'category',\n" - " label: 'Skills',\n" - " collapsed: true,\n" - " items: [\n" - " 'reference/skills-catalog',\n" - " 'reference/optional-skills-catalog',\n" - " ],\n" - " },\n" - ) + +def write_sidebar(entries): + # Sidebar layout: + # Skills + # ├── reference/skills-catalog + # ├── reference/optional-skills-catalog + # ├── Bundled + # │ ├── apple/ + # │ │ ├── apple-apple-notes + # │ │ └── ... + # │ └── ... + # └── Optional + # └── ... + # + # The two catalog index pages stay at the top of the Skills section so + # the at-a-glance table view is one click away, and the per-category + # subtrees give individual skill pages real sidebar navigation when + # users land on them directly. + tree = build_sidebar_items(entries) + + skills_block: list[dict[str, Any]] = [ + { + "label": "Bundled", + "collapsed": True, + "items": tree["bundled_categories"], + }, + { + "label": "Optional", + "collapsed": True, + "items": tree["optional_categories"], + }, + ] + skills_items: list[Any] = [ + "reference/skills-catalog", + "reference/optional-skills-catalog", + *skills_block, + ] + + skills_top = { + "label": "Skills", + "collapsed": True, + "items": skills_items, + } + skills_subtree = "\n".join(_render_sidebar_item(skills_top, 8)) + "\n" sidebar_path = REPO / "website" / "sidebars.ts" text = sidebar_path.read_text(encoding="utf-8") diff --git a/website/sidebars.ts b/website/sidebars.ts index f0a0658c3bf..fe7b741eb2e 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -113,6 +113,444 @@ const sidebars: SidebarsConfig = { items: [ 'reference/skills-catalog', 'reference/optional-skills-catalog', + { + type: 'category', + label: 'Bundled', + collapsed: true, + items: [ + { + type: 'category', + label: 'apple', + collapsed: true, + items: [ + 'user-guide/skills/bundled/apple/apple-apple-notes', + 'user-guide/skills/bundled/apple/apple-apple-reminders', + 'user-guide/skills/bundled/apple/apple-findmy', + 'user-guide/skills/bundled/apple/apple-imessage', + 'user-guide/skills/bundled/apple/apple-macos-computer-use', + ], + }, + { + type: 'category', + label: 'autonomous-ai-agents', + collapsed: true, + items: [ + 'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code', + 'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex', + 'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent', + 'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode', + ], + }, + { + type: 'category', + label: 'creative', + collapsed: true, + items: [ + 'user-guide/skills/bundled/creative/creative-architecture-diagram', + 'user-guide/skills/bundled/creative/creative-ascii-art', + 'user-guide/skills/bundled/creative/creative-ascii-video', + 'user-guide/skills/bundled/creative/creative-baoyu-comic', + 'user-guide/skills/bundled/creative/creative-baoyu-infographic', + 'user-guide/skills/bundled/creative/creative-claude-design', + 'user-guide/skills/bundled/creative/creative-comfyui', + 'user-guide/skills/bundled/creative/creative-creative-ideation', + 'user-guide/skills/bundled/creative/creative-design-md', + 'user-guide/skills/bundled/creative/creative-excalidraw', + 'user-guide/skills/bundled/creative/creative-humanizer', + 'user-guide/skills/bundled/creative/creative-manim-video', + 'user-guide/skills/bundled/creative/creative-p5js', + 'user-guide/skills/bundled/creative/creative-pixel-art', + 'user-guide/skills/bundled/creative/creative-popular-web-designs', + 'user-guide/skills/bundled/creative/creative-pretext', + 'user-guide/skills/bundled/creative/creative-sketch', + 'user-guide/skills/bundled/creative/creative-songwriting-and-ai-music', + 'user-guide/skills/bundled/creative/creative-touchdesigner-mcp', + ], + }, + { + type: 'category', + label: 'data-science', + collapsed: true, + items: [ + 'user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel', + ], + }, + { + type: 'category', + label: 'devops', + collapsed: true, + items: [ + 'user-guide/skills/bundled/devops/devops-kanban-orchestrator', + 'user-guide/skills/bundled/devops/devops-kanban-worker', + 'user-guide/skills/bundled/devops/devops-webhook-subscriptions', + ], + }, + { + type: 'category', + label: 'dogfood', + collapsed: true, + items: [ + 'user-guide/skills/bundled/dogfood/dogfood-dogfood', + ], + }, + { + type: 'category', + label: 'email', + collapsed: true, + items: [ + 'user-guide/skills/bundled/email/email-himalaya', + ], + }, + { + type: 'category', + label: 'gaming', + collapsed: true, + items: [ + 'user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server', + 'user-guide/skills/bundled/gaming/gaming-pokemon-player', + ], + }, + { + type: 'category', + label: 'github', + collapsed: true, + items: [ + 'user-guide/skills/bundled/github/github-codebase-inspection', + 'user-guide/skills/bundled/github/github-github-auth', + 'user-guide/skills/bundled/github/github-github-code-review', + 'user-guide/skills/bundled/github/github-github-issues', + 'user-guide/skills/bundled/github/github-github-pr-workflow', + 'user-guide/skills/bundled/github/github-github-repo-management', + ], + }, + { + type: 'category', + label: 'mcp', + collapsed: true, + items: [ + 'user-guide/skills/bundled/mcp/mcp-native-mcp', + ], + }, + { + type: 'category', + label: 'media', + collapsed: true, + items: [ + 'user-guide/skills/bundled/media/media-gif-search', + 'user-guide/skills/bundled/media/media-heartmula', + 'user-guide/skills/bundled/media/media-songsee', + 'user-guide/skills/bundled/media/media-spotify', + 'user-guide/skills/bundled/media/media-youtube-content', + ], + }, + { + type: 'category', + label: 'mlops', + collapsed: true, + items: [ + 'user-guide/skills/bundled/mlops/mlops-models-audiocraft', + 'user-guide/skills/bundled/mlops/mlops-research-dspy', + 'user-guide/skills/bundled/mlops/mlops-huggingface-hub', + 'user-guide/skills/bundled/mlops/mlops-inference-llama-cpp', + 'user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness', + 'user-guide/skills/bundled/mlops/mlops-inference-obliteratus', + 'user-guide/skills/bundled/mlops/mlops-models-segment-anything', + 'user-guide/skills/bundled/mlops/mlops-inference-vllm', + 'user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases', + ], + }, + { + type: 'category', + label: 'note-taking', + collapsed: true, + items: [ + 'user-guide/skills/bundled/note-taking/note-taking-obsidian', + ], + }, + { + type: 'category', + label: 'productivity', + collapsed: true, + items: [ + 'user-guide/skills/bundled/productivity/productivity-airtable', + 'user-guide/skills/bundled/productivity/productivity-google-workspace', + 'user-guide/skills/bundled/productivity/productivity-linear', + 'user-guide/skills/bundled/productivity/productivity-maps', + 'user-guide/skills/bundled/productivity/productivity-nano-pdf', + 'user-guide/skills/bundled/productivity/productivity-notion', + 'user-guide/skills/bundled/productivity/productivity-ocr-and-documents', + 'user-guide/skills/bundled/productivity/productivity-powerpoint', + 'user-guide/skills/bundled/productivity/productivity-teams-meeting-pipeline', + ], + }, + { + type: 'category', + label: 'red-teaming', + collapsed: true, + items: [ + 'user-guide/skills/bundled/red-teaming/red-teaming-godmode', + ], + }, + { + type: 'category', + label: 'research', + collapsed: true, + items: [ + 'user-guide/skills/bundled/research/research-arxiv', + 'user-guide/skills/bundled/research/research-blogwatcher', + 'user-guide/skills/bundled/research/research-llm-wiki', + 'user-guide/skills/bundled/research/research-polymarket', + 'user-guide/skills/bundled/research/research-research-paper-writing', + ], + }, + { + type: 'category', + label: 'smart-home', + collapsed: true, + items: [ + 'user-guide/skills/bundled/smart-home/smart-home-openhue', + ], + }, + { + type: 'category', + label: 'social-media', + collapsed: true, + items: [ + 'user-guide/skills/bundled/social-media/social-media-xurl', + ], + }, + { + type: 'category', + label: 'software-development', + collapsed: true, + items: [ + 'user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands', + 'user-guide/skills/bundled/software-development/software-development-hermes-agent-skill-authoring', + 'user-guide/skills/bundled/software-development/software-development-node-inspect-debugger', + 'user-guide/skills/bundled/software-development/software-development-plan', + 'user-guide/skills/bundled/software-development/software-development-python-debugpy', + 'user-guide/skills/bundled/software-development/software-development-requesting-code-review', + 'user-guide/skills/bundled/software-development/software-development-spike', + 'user-guide/skills/bundled/software-development/software-development-subagent-driven-development', + 'user-guide/skills/bundled/software-development/software-development-systematic-debugging', + 'user-guide/skills/bundled/software-development/software-development-test-driven-development', + 'user-guide/skills/bundled/software-development/software-development-writing-plans', + ], + }, + { + type: 'category', + label: 'yuanbao', + collapsed: true, + items: [ + 'user-guide/skills/bundled/yuanbao/yuanbao-yuanbao', + ], + }, + ], + }, + { + type: 'category', + label: 'Optional', + collapsed: true, + items: [ + { + type: 'category', + label: 'autonomous-ai-agents', + collapsed: true, + items: [ + 'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox', + 'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-honcho', + ], + }, + { + type: 'category', + label: 'blockchain', + collapsed: true, + items: [ + 'user-guide/skills/optional/blockchain/blockchain-evm', + 'user-guide/skills/optional/blockchain/blockchain-hyperliquid', + 'user-guide/skills/optional/blockchain/blockchain-solana', + ], + }, + { + type: 'category', + label: 'communication', + collapsed: true, + items: [ + 'user-guide/skills/optional/communication/communication-one-three-one-rule', + ], + }, + { + type: 'category', + label: 'creative', + collapsed: true, + items: [ + 'user-guide/skills/optional/creative/creative-blender-mcp', + 'user-guide/skills/optional/creative/creative-concept-diagrams', + 'user-guide/skills/optional/creative/creative-hyperframes', + 'user-guide/skills/optional/creative/creative-kanban-video-orchestrator', + 'user-guide/skills/optional/creative/creative-meme-generation', + ], + }, + { + type: 'category', + label: 'devops', + collapsed: true, + items: [ + 'user-guide/skills/optional/devops/devops-cli', + 'user-guide/skills/optional/devops/devops-docker-management', + 'user-guide/skills/optional/devops/devops-watchers', + ], + }, + { + type: 'category', + label: 'dogfood', + collapsed: true, + items: [ + 'user-guide/skills/optional/dogfood/dogfood-adversarial-ux-test', + ], + }, + { + type: 'category', + label: 'email', + collapsed: true, + items: [ + 'user-guide/skills/optional/email/email-agentmail', + ], + }, + { + type: 'category', + label: 'finance', + collapsed: true, + items: [ + 'user-guide/skills/optional/finance/finance-3-statement-model', + 'user-guide/skills/optional/finance/finance-comps-analysis', + 'user-guide/skills/optional/finance/finance-dcf-model', + 'user-guide/skills/optional/finance/finance-excel-author', + 'user-guide/skills/optional/finance/finance-lbo-model', + 'user-guide/skills/optional/finance/finance-merger-model', + 'user-guide/skills/optional/finance/finance-pptx-author', + 'user-guide/skills/optional/finance/finance-stocks', + ], + }, + { + type: 'category', + label: 'health', + collapsed: true, + items: [ + 'user-guide/skills/optional/health/health-fitness-nutrition', + 'user-guide/skills/optional/health/health-neuroskill-bci', + ], + }, + { + type: 'category', + label: 'mcp', + collapsed: true, + items: [ + 'user-guide/skills/optional/mcp/mcp-fastmcp', + 'user-guide/skills/optional/mcp/mcp-mcporter', + ], + }, + { + type: 'category', + label: 'migration', + collapsed: true, + items: [ + 'user-guide/skills/optional/migration/migration-openclaw-migration', + ], + }, + { + type: 'category', + label: 'mlops', + collapsed: true, + items: [ + 'user-guide/skills/optional/mlops/mlops-accelerate', + 'user-guide/skills/optional/mlops/mlops-training-axolotl', + 'user-guide/skills/optional/mlops/mlops-chroma', + 'user-guide/skills/optional/mlops/mlops-clip', + 'user-guide/skills/optional/mlops/mlops-faiss', + 'user-guide/skills/optional/mlops/mlops-flash-attention', + 'user-guide/skills/optional/mlops/mlops-guidance', + 'user-guide/skills/optional/mlops/mlops-huggingface-tokenizers', + 'user-guide/skills/optional/mlops/mlops-instructor', + 'user-guide/skills/optional/mlops/mlops-lambda-labs', + 'user-guide/skills/optional/mlops/mlops-llava', + 'user-guide/skills/optional/mlops/mlops-modal', + 'user-guide/skills/optional/mlops/mlops-nemo-curator', + 'user-guide/skills/optional/mlops/mlops-inference-outlines', + 'user-guide/skills/optional/mlops/mlops-peft', + 'user-guide/skills/optional/mlops/mlops-pinecone', + 'user-guide/skills/optional/mlops/mlops-pytorch-fsdp', + 'user-guide/skills/optional/mlops/mlops-pytorch-lightning', + 'user-guide/skills/optional/mlops/mlops-qdrant', + 'user-guide/skills/optional/mlops/mlops-saelens', + 'user-guide/skills/optional/mlops/mlops-simpo', + 'user-guide/skills/optional/mlops/mlops-slime', + 'user-guide/skills/optional/mlops/mlops-stable-diffusion', + 'user-guide/skills/optional/mlops/mlops-tensorrt-llm', + 'user-guide/skills/optional/mlops/mlops-torchtitan', + 'user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning', + 'user-guide/skills/optional/mlops/mlops-training-unsloth', + 'user-guide/skills/optional/mlops/mlops-whisper', + ], + }, + { + type: 'category', + label: 'productivity', + collapsed: true, + items: [ + 'user-guide/skills/optional/productivity/productivity-canvas', + 'user-guide/skills/optional/productivity/productivity-here-now', + 'user-guide/skills/optional/productivity/productivity-memento-flashcards', + 'user-guide/skills/optional/productivity/productivity-shop-app', + 'user-guide/skills/optional/productivity/productivity-shopify', + 'user-guide/skills/optional/productivity/productivity-siyuan', + 'user-guide/skills/optional/productivity/productivity-telephony', + ], + }, + { + type: 'category', + label: 'research', + collapsed: true, + items: [ + 'user-guide/skills/optional/research/research-bioinformatics', + 'user-guide/skills/optional/research/research-domain-intel', + 'user-guide/skills/optional/research/research-drug-discovery', + 'user-guide/skills/optional/research/research-duckduckgo-search', + 'user-guide/skills/optional/research/research-gitnexus-explorer', + 'user-guide/skills/optional/research/research-parallel-cli', + 'user-guide/skills/optional/research/research-qmd', + 'user-guide/skills/optional/research/research-scrapling', + 'user-guide/skills/optional/research/research-searxng-search', + ], + }, + { + type: 'category', + label: 'security', + collapsed: true, + items: [ + 'user-guide/skills/optional/security/security-1password', + 'user-guide/skills/optional/security/security-oss-forensics', + 'user-guide/skills/optional/security/security-sherlock', + ], + }, + { + type: 'category', + label: 'software-development', + collapsed: true, + items: [ + 'user-guide/skills/optional/software-development/software-development-rest-graphql-debug', + ], + }, + { + type: 'category', + label: 'web-development', + collapsed: true, + items: [ + 'user-guide/skills/optional/web-development/web-development-page-agent', + ], + }, + ], + }, ], }, ], From ce0e189d3e7185d6c8c6af924a1df23e17c6f85c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 17:11:06 -0700 Subject: [PATCH 093/218] fix(xai-oauth): break entitlement-403 credential-refresh loop, bump grok-4.3 context to 1M (#26664) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don Piedro's 18-minute hang on grok-4.3 traced to two issues PR #26644 didn't cover: - _recover_with_credential_pool classifies 403 as FailoverReason.auth and calls pool.try_refresh_current(). For xAI OAuth on an unsubscribed account, refresh succeeds (mints a new token from the same account) but the next API call 403s with the same entitlement error. Result: infinite refresh → retry → 403 loop until Ctrl+C (1133s in Don's log). New _is_entitlement_failure(error_context, status_code) detects the subscription-shape body ("do not have an active Grok subscription" / "out of available resources" + grok / "does not have permission" + grok) and short-circuits recovery so _summarize_api_error surfaces PR #26644's friendly hint. - grok-4.3 resolved to 256k via the grok-4 catch-all in DEFAULT_CONTEXT_LENGTHS. Per docs.x.ai/developers/models/grok-4.3 the model ships with 1M context. Add explicit grok-4.3 entry before the grok-4 fallback (longest-first substring matching ensures grok-4.3 and grok-4.3-latest both land on the new value). Tests: 8 new (23 total in test_codex_xai_oauth_recovery.py). E2E verified Don's 100-iteration loop bails out with 0 refresh calls while genuine auth failures still refresh once and recover. --- agent/model_metadata.py | 1 + run_agent.py | 56 ++++++ .../test_codex_xai_oauth_recovery.py | 190 ++++++++++++++++++ 3 files changed, 247 insertions(+) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index a10a01e3cc2..41e229416c9 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -213,6 +213,7 @@ DEFAULT_CONTEXT_LENGTHS = { "grok-2-vision": 8192, # grok-2-vision, -1212, -latest "grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning "grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309 + "grok-4.3": 1000000, # grok-4.3, grok-4.3-latest — 1M context per docs.x.ai "grok-4": 256000, # grok-4, grok-4-0709 "grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast "grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest diff --git a/run_agent.py b/run_agent.py index 2b20d48ede2..da47ca84e34 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4966,6 +4966,44 @@ class AIAgent: trajectory = self._convert_to_trajectory_format(messages, user_query, completed) _save_trajectory_to_file(trajectory, self.model, completed) + @staticmethod + def _is_entitlement_failure( + error_context: Optional[Dict[str, Any]], + status_code: Optional[int], + ) -> bool: + """Detect subscription/entitlement 403s that masquerade as auth failures. + + Returned True only when the body text matches a known entitlement + shape AND the status is 401/403. Refreshing an OAuth token cannot + fix an unsubscribed account, so callers should surface the error + instead of looping the credential pool. + + Current matches: + * xAI OAuth: "do not have an active Grok subscription" / + "out of available resources" / "does not have permission" + "grok" + + Extend here for new providers as we discover them (Anthropic's + Claude Max OAuth entitlement errors look distinct enough today that + the existing 1M-context-beta branch handles them; revisit if other + subscription tiers start producing the same loop signature). + """ + if status_code not in (401, 403, None): + return False + if not isinstance(error_context, dict): + return False + message = str(error_context.get("message") or "").lower() + reason = str(error_context.get("reason") or "").lower() + haystack = f"{message} {reason}" + if not haystack.strip(): + return False + if "do not have an active grok subscription" in haystack: + return True + if "out of available resources" in haystack and "grok" in haystack: + return True + if "does not have permission" in haystack and "grok" in haystack: + return True + return False + @staticmethod def _decorate_xai_entitlement_error(detail: str) -> str: """Append a friendly hint when xAI's OAuth surface returns an @@ -7551,6 +7589,24 @@ class AIAgent: return False, True if effective_reason == FailoverReason.auth: + # Subscription/entitlement 403s look like auth failures on the + # wire but refresh cannot fix them — the OAuth token is + # already valid; the account simply lacks the entitlement + # (e.g. xAI OAuth without SuperGrok/X Premium for grok-4.3). + # Without this guard, ``try_refresh_current()`` keeps minting + # fresh tokens against the same unsubscribed account and the + # main agent loop spins re-issuing the same 403 until the + # user Ctrl+C's. Surface the error instead so the friendly + # entitlement hint from ``_summarize_api_error`` can land. + if self._is_entitlement_failure(error_context, status_code): + logger.info( + "Credential %s — entitlement-shaped 403 from %s; " + "skipping pool refresh (account lacks subscription, " + "not a transient auth failure).", + status_code if status_code is not None else "auth", + self.provider or "provider", + ) + return False, has_retried_429 refreshed = pool.try_refresh_current() if refreshed is not None: logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}") diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py index 0f3603d2ca7..7c675f22225 100644 --- a/tests/run_agent/test_codex_xai_oauth_recovery.py +++ b/tests/run_agent/test_codex_xai_oauth_recovery.py @@ -349,3 +349,193 @@ def test_codex_transport_native_codex_still_replays_reasoning_in_input(): assert reasoning_items[0]["encrypted_content"] == "enc_blob" # Native Codex still asks for encrypted_content back. assert "reasoning.encrypted_content" in kwargs.get("include", []) + + +# --------------------------------------------------------------------------- +# Fix D: entitlement 403 must NOT trigger credential-pool refresh loop +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "message", + [ + # The exact wire text RaidenTyler and Don Piedro captured. + "You have either run out of available resources or do not have an " + "active Grok subscription. Manage at https://grok.com", + # Permission-style variant from the same 403 body. + "The caller does not have permission to execute the specified " + "operation for grok-4.3", + ], +) +def test_is_entitlement_failure_matches_real_xai_bodies(message): + from run_agent import AIAgent + + assert AIAgent._is_entitlement_failure( + {"message": message, "reason": "permission_denied"}, + 403, + ) + + +def test_is_entitlement_failure_false_for_status_other_than_401_403(): + """200/429/500 must never be classified as entitlement, even if body matches.""" + from run_agent import AIAgent + + body = { + "message": "do not have an active Grok subscription", + } + assert not AIAgent._is_entitlement_failure(body, 500) + assert not AIAgent._is_entitlement_failure(body, 429) + assert not AIAgent._is_entitlement_failure(body, 200) + + +def test_is_entitlement_failure_false_for_unrelated_auth_errors(): + """A real auth failure (expired token, wrong key) must keep refreshing.""" + from run_agent import AIAgent + + # Generic Anthropic-style auth failure + assert not AIAgent._is_entitlement_failure( + {"message": "Invalid API key", "reason": "authentication_error"}, + 401, + ) + # OAuth token expired + assert not AIAgent._is_entitlement_failure( + {"message": "Token has expired", "reason": "unauthorized"}, + 401, + ) + # Empty context + assert not AIAgent._is_entitlement_failure({}, 401) + assert not AIAgent._is_entitlement_failure(None, 401) + + +def test_recover_with_credential_pool_skips_refresh_on_entitlement_403(): + """The recovery path must NOT call pool.try_refresh_current() on entitlement 403. + + Before the fix, an unsubscribed xAI OAuth account would burn the agent + loop indefinitely: refresh → 403 → refresh → 403, infinitely. With + the entitlement guard, recovery returns False so the error surfaces + normally with the friendly hint from _summarize_api_error. + """ + from run_agent import AIAgent + from agent.error_classifier import FailoverReason + + agent = _make_codex_agent() + + # Wire a fake credential pool that records refresh attempts. + refresh_calls = {"n": 0} + + class _FakePool: + def try_refresh_current(self): + refresh_calls["n"] += 1 + return MagicMock(id="should_not_be_called") + + def mark_exhausted_and_rotate(self, **_kwargs): + return None + + def has_available(self): + return False + + agent._credential_pool = _FakePool() + + error_context = { + "reason": "The caller does not have permission to execute the specified operation", + "message": "You have either run out of available resources or do not have an " + "active Grok subscription. Manage at https://grok.com", + } + + recovered, _retried_429 = agent._recover_with_credential_pool( + status_code=403, + has_retried_429=False, + classified_reason=FailoverReason.auth, + error_context=error_context, + ) + + assert recovered is False, "Entitlement 403 must surface, not silently recover" + assert refresh_calls["n"] == 0, "try_refresh_current must NOT be called on entitlement 403" + + +def test_recover_with_credential_pool_still_refreshes_genuine_auth_failure(): + """Regression guard: legitimate auth errors must still trigger refresh.""" + from run_agent import AIAgent + from agent.error_classifier import FailoverReason + + agent = _make_codex_agent() + + refresh_calls = {"n": 0} + + class _FakePool: + def try_refresh_current(self): + refresh_calls["n"] += 1 + # Return a fake refreshed entry — semantically "refresh worked" + entry = MagicMock() + entry.id = "entry_refreshed" + return entry + + def mark_exhausted_and_rotate(self, **_kwargs): + return None + + def has_available(self): + return False + + agent._credential_pool = _FakePool() + # _swap_credential is called by the recovery path — stub it out + agent._swap_credential = MagicMock() + + error_context = { + "reason": "authentication_error", + "message": "Invalid API key", + } + + recovered, _retried_429 = agent._recover_with_credential_pool( + status_code=401, + has_retried_429=False, + classified_reason=FailoverReason.auth, + error_context=error_context, + ) + + assert recovered is True, "Genuine auth failure must still recover via refresh" + assert refresh_calls["n"] == 1 + + +# --------------------------------------------------------------------------- +# Fix E: grok-4.3 context length must be 1M, not 256K +# --------------------------------------------------------------------------- + + +def test_grok_4_3_context_length_is_1m(): + """grok-4.3 ships with 1M context per docs.x.ai/developers/models/grok-4.3. + + Hermes' substring-match fallback used to return 256k (from the + "grok-4" catch-all) which under-reported the model's real capacity. + """ + from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS + + # The entry exists with the expected value. + assert DEFAULT_CONTEXT_LENGTHS["grok-4.3"] == 1_000_000 + + # And longest-first substring matching resolves grok-4.3 and + # grok-4.3-latest to the new value, NOT the grok-4 catch-all. + for slug in ("grok-4.3", "grok-4.3-latest"): + matched_key = max( + (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()), + key=len, + ) + assert matched_key == "grok-4.3", ( + f"Expected longest-first match to land on grok-4.3 for {slug}, " + f"got {matched_key}" + ) + assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 1_000_000 + + +def test_grok_4_still_resolves_to_256k(): + """Regression guard: grok-4 (non-.3) must still resolve to 256k.""" + from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS + + for slug in ("grok-4", "grok-4-0709"): + matched_key = max( + (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()), + key=len, + ) + # grok-4-0709 contains "grok-4" but not "grok-4.3"; matched key + # must be "grok-4" (or a more specific variant family if one is + # ever added). The 256k contract must hold. + assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 256_000 From 9818b9a1acb915971d835d1faa85949e9f7a87a5 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 17:15:22 -0700 Subject: [PATCH 094/218] fix(xai-oauth): rewrite entitlement-403 hint to not accuse subscribers (#26666) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #26644 confidently told users "xAI OAuth account lacks SuperGrok / X Premium entitlement" on any 403 from xAI's permission-denied surface. But that body is returned for at least four distinct causes that Hermes cannot distinguish from the wire: * Account has no Grok subscription at all * Account has SuperGrok but the tier doesn't include the requested model (e.g. grok-4.3 needs SuperGrok Heavy) * Monthly quota for the subscribed tier is exhausted * SuperGrok is active but the API access add-on isn't enabled Don Piedro pushed back that he IS subscribed yet still hit this. Picking the worst-case interpretation ("you're not subscribed") reads as wrong and insulting to subscribers, and points them at a fix they already did. New wording lists all 4 possibilities and points at https://grok.com/?_s=usage where the user can check which applies. The detection logic and credential-pool short-circuit (PR #26664) are unchanged — only the user-facing wording is rephrased. --- run_agent.py | 42 ++++++++++++------ .../test_codex_xai_oauth_recovery.py | 44 +++++++++++++++++-- 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/run_agent.py b/run_agent.py index da47ca84e34..da05e7e8239 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5006,23 +5006,35 @@ class AIAgent: @staticmethod def _decorate_xai_entitlement_error(detail: str) -> str: - """Append a friendly hint when xAI's OAuth surface returns an - entitlement-shaped error. + """Append a neutral hint when xAI's OAuth surface returns the + permission-denied 403. - xAI's ``/v1/responses`` endpoint replies to OAuth tokens that lack a - SuperGrok / X Premium subscription with HTTP 403 carrying a body like:: + xAI's ``/v1/responses`` endpoint replies to several distinct failure + modes with the SAME body:: {"code": "The caller does not have permission to execute the specified operation", "error": "You have either run out of available resources or do not have an active Grok subscription. - Manage subscriptions at https://grok.com/..."} + Manage subscriptions at https://grok.com/?_s=usage or subscribe + at https://grok.com/supergrok"} - The raw text is useful but the action the user needs to take (subscribe - on grok.com, or switch providers with ``/model``) isn't obvious from - the wire format. Detect the entitlement shape and append a hint. + That body covers at least four real causes we cannot distinguish + without more info from xAI: - Matched once per detail string — won't double-decorate if the upstream - already concatenated the same text. + * Account has no Grok subscription at all + * Account has SuperGrok but the tier doesn't include the requested + model (e.g. grok-4.3 needs SuperGrok Heavy) + * Monthly quota for the subscribed tier is exhausted (the + ``?_s=usage`` URL hints at this) + * SuperGrok is active but the API access add-on isn't enabled + + Picking one ("you're not subscribed") is wrong for the other three + and reads as insulting to subscribers. Surface the raw xAI text + verbatim and point at https://grok.com/?_s=usage where the user + can see WHICH of those four it is. + + Matched once per detail string — won't double-decorate if the + upstream already concatenated the same text. """ if not detail: return detail @@ -5035,11 +5047,15 @@ class AIAgent: if not is_entitlement: return detail hint = ( - " — xAI OAuth account lacks SuperGrok / X Premium entitlement for " - "this model. Subscribe at https://grok.com or run `/model` to " + " — xAI rejected the request on this OAuth account. Could be a " + "missing subscription, a tier that doesn't include this model, an " + "exhausted quota, or API access not enabled. Check " + "https://grok.com/?_s=usage to see which, or run `/model` to " "switch providers." ) - if hint.strip() in detail: + # Idempotency: detect prior decoration by a substring unique to the + # hint (not present in xAI's own body text). + if "Could be a missing subscription" in detail: return detail return f"{detail}{hint}" diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py index 7c675f22225..c64f46eea09 100644 --- a/tests/run_agent/test_codex_xai_oauth_recovery.py +++ b/tests/run_agent/test_codex_xai_oauth_recovery.py @@ -163,7 +163,12 @@ def test_codex_stream_postlude_error_still_falls_back(): def test_summarize_api_error_decorates_xai_entitlement_403(): - """xAI's OAuth 403 must end with the subscribe-or-switch hint.""" + """xAI's OAuth 403 must end with the neutral 4-cause hint. + + Wording is deliberately ambiguous because xAI returns the SAME body for: + no subscription, wrong tier, exhausted quota, or API access not enabled. + Picking one (e.g. "you're not subscribed") would insult subscribers. + """ from run_agent import AIAgent error = RuntimeError( @@ -173,10 +178,39 @@ def test_summarize_api_error_decorates_xai_entitlement_403(): "subscriptions at https://grok.com'}" ) summary = AIAgent._summarize_api_error(error) + # The original xAI text must survive — it's still useful diagnostic info. assert "do not have an active Grok subscription" in summary - assert "SuperGrok" in summary + # The hint must NOT confidently assert "lacks subscription"; it must + # acknowledge the 4 possible causes. + assert "Could be a missing subscription" in summary + assert "tier that doesn't include this model" in summary + assert "exhausted quota" in summary + assert "API access not enabled" in summary + # The hint must point at the usage page where the user can verify which. + assert "https://grok.com/?_s=usage" in summary + # Switching providers is still a valid escape hatch. assert "/model" in summary - assert "https://grok.com" in summary + + +def test_summarize_api_error_does_not_accuse_subscribers(): + """Hint must not confidently say the user has no subscription. + + Don Piedro reported his subscription is active. The hint must not + contradict him — it must list all 4 possible causes and let him + check which one applies. + """ + from run_agent import AIAgent + + error = RuntimeError( + "HTTP 403: do not have an active Grok subscription" + ) + summary = AIAgent._summarize_api_error(error) + # MUST NOT contain language that assumes the user is unsubscribed. + assert "lacks SuperGrok" not in summary + assert "lacks subscription" not in summary + assert "your account doesn't have" not in summary.lower() + # MUST contain the neutral framing. + assert "Could be" in summary or "could be" in summary def test_summarize_api_error_decorates_xai_body_message(): @@ -197,7 +231,7 @@ def test_summarize_api_error_decorates_xai_body_message(): summary = AIAgent._summarize_api_error(_XaiErr("403")) assert "HTTP 403" in summary - assert "SuperGrok / X Premium" in summary + assert "Could be a missing subscription" in summary def test_summarize_api_error_idempotent_for_entitlement_hint(): @@ -208,6 +242,8 @@ def test_summarize_api_error_idempotent_for_entitlement_hint(): once = AIAgent._decorate_xai_entitlement_error(raw) twice = AIAgent._decorate_xai_entitlement_error(once) assert once == twice + # Sanity: the hint did fire on the first pass. + assert "Could be a missing subscription" in once def test_summarize_api_error_passes_through_unrelated_errors(): From 6784c80794bfd3cc40aae7f7d9f1a59876de7799 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 17:23:33 -0700 Subject: [PATCH 095/218] fix(xai-oauth): lead entitlement-403 hint with X Premium+ gotcha (#26672) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The #1 confusing cause of the xAI 403 (per Teknium): X Premium+ subscribers see Grok inside the X app and assume API access is included. It is NOT — only standalone SuperGrok subscribers can use xai-oauth with Hermes today. Without calling this out, every Premium+ user hits the 403 with no idea why. PR #26666's neutral 4-cause list was correct but buried the most common cause. Lead with the Premium+ gotcha, then list the other possibilities (no subscription, wrong tier, exhausted quota) as fallbacks. Same neutral framing — does not accuse anyone of being unsubscribed. --- run_agent.py | 38 ++++++++-------- .../test_codex_xai_oauth_recovery.py | 44 ++++++++++--------- 2 files changed, 44 insertions(+), 38 deletions(-) diff --git a/run_agent.py b/run_agent.py index da05e7e8239..85c1128d68e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5018,20 +5018,21 @@ class AIAgent: Manage subscriptions at https://grok.com/?_s=usage or subscribe at https://grok.com/supergrok"} - That body covers at least four real causes we cannot distinguish - without more info from xAI: + That body covers several real causes we cannot distinguish without + more info from xAI. The most common (and least obvious) one is + that **X Premium+ does NOT include API access** — only standalone + SuperGrok subscribers can use Hermes against xai-oauth. Lots of + users see Grok in their X app, assume it works here too, and hit + this 403 with no idea why. Lead the hint with that. - * Account has no Grok subscription at all - * Account has SuperGrok but the tier doesn't include the requested - model (e.g. grok-4.3 needs SuperGrok Heavy) - * Monthly quota for the subscribed tier is exhausted (the - ``?_s=usage`` URL hints at this) - * SuperGrok is active but the API access add-on isn't enabled + Other possible causes: + * No Grok subscription at all + * SuperGrok tier doesn't include the requested model (e.g. + grok-4.3 may need a higher tier) + * Monthly quota exhausted (the ``?_s=usage`` URL hints at this) - Picking one ("you're not subscribed") is wrong for the other three - and reads as insulting to subscribers. Surface the raw xAI text - verbatim and point at https://grok.com/?_s=usage where the user - can see WHICH of those four it is. + Surface the raw xAI text verbatim and point at + https://grok.com/?_s=usage where the user can see WHICH applies. Matched once per detail string — won't double-decorate if the upstream already concatenated the same text. @@ -5047,15 +5048,16 @@ class AIAgent: if not is_entitlement: return detail hint = ( - " — xAI rejected the request on this OAuth account. Could be a " - "missing subscription, a tier that doesn't include this model, an " - "exhausted quota, or API access not enabled. Check " - "https://grok.com/?_s=usage to see which, or run `/model` to " - "switch providers." + " — xAI rejected this OAuth account. NOTE: X Premium+ does NOT " + "include xAI API access — only standalone SuperGrok subscribers " + "can use this provider. Other possible causes: no Grok " + "subscription, your tier doesn't include this model, or your " + "quota is exhausted. Check https://grok.com/?_s=usage to see " + "which, or run `/model` to switch providers." ) # Idempotency: detect prior decoration by a substring unique to the # hint (not present in xAI's own body text). - if "Could be a missing subscription" in detail: + if "X Premium+ does NOT include" in detail: return detail return f"{detail}{hint}" diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py index c64f46eea09..9192d50695b 100644 --- a/tests/run_agent/test_codex_xai_oauth_recovery.py +++ b/tests/run_agent/test_codex_xai_oauth_recovery.py @@ -163,11 +163,13 @@ def test_codex_stream_postlude_error_still_falls_back(): def test_summarize_api_error_decorates_xai_entitlement_403(): - """xAI's OAuth 403 must end with the neutral 4-cause hint. + """xAI's OAuth 403 must surface the X Premium+ gotcha + neutral causes. - Wording is deliberately ambiguous because xAI returns the SAME body for: - no subscription, wrong tier, exhausted quota, or API access not enabled. - Picking one (e.g. "you're not subscribed") would insult subscribers. + Wording deliberately leads with the X Premium+ gotcha because that's + the #1 confusing case: people see Grok in their X app, assume it + works here too, and hit this 403 with no idea API access is a + separate SKU. Other causes (no subscription, wrong tier, exhausted + quota) follow. """ from run_agent import AIAgent @@ -180,13 +182,15 @@ def test_summarize_api_error_decorates_xai_entitlement_403(): summary = AIAgent._summarize_api_error(error) # The original xAI text must survive — it's still useful diagnostic info. assert "do not have an active Grok subscription" in summary - # The hint must NOT confidently assert "lacks subscription"; it must - # acknowledge the 4 possible causes. - assert "Could be a missing subscription" in summary - assert "tier that doesn't include this model" in summary - assert "exhausted quota" in summary - assert "API access not enabled" in summary - # The hint must point at the usage page where the user can verify which. + # The hint MUST lead with the X Premium+ gotcha (most likely cause + # for users who think they're subscribed). + assert "X Premium+ does NOT include" in summary + assert "standalone SuperGrok subscribers" in summary + # Other causes still listed. + assert "no Grok subscription" in summary + assert "tier doesn't include this model" in summary + assert "quota is exhausted" in summary + # The hint must point at the usage page where the user can verify. assert "https://grok.com/?_s=usage" in summary # Switching providers is still a valid escape hatch. assert "/model" in summary @@ -196,8 +200,9 @@ def test_summarize_api_error_does_not_accuse_subscribers(): """Hint must not confidently say the user has no subscription. Don Piedro reported his subscription is active. The hint must not - contradict him — it must list all 4 possible causes and let him - check which one applies. + contradict him — leading with the X Premium+ gotcha gives subscribers + a plausible reason ("oh, I'm on Premium+ not pure SuperGrok") instead + of accusing them of lying about having a subscription. """ from run_agent import AIAgent @@ -205,12 +210,11 @@ def test_summarize_api_error_does_not_accuse_subscribers(): "HTTP 403: do not have an active Grok subscription" ) summary = AIAgent._summarize_api_error(error) - # MUST NOT contain language that assumes the user is unsubscribed. + # MUST NOT contain language that flatly assumes the user is unsubscribed. assert "lacks SuperGrok" not in summary - assert "lacks subscription" not in summary - assert "your account doesn't have" not in summary.lower() - # MUST contain the neutral framing. - assert "Could be" in summary or "could be" in summary + assert "you are not subscribed" not in summary.lower() + # MUST lead with the most-likely-but-non-accusatory cause. + assert "X Premium+ does NOT include" in summary def test_summarize_api_error_decorates_xai_body_message(): @@ -231,7 +235,7 @@ def test_summarize_api_error_decorates_xai_body_message(): summary = AIAgent._summarize_api_error(_XaiErr("403")) assert "HTTP 403" in summary - assert "Could be a missing subscription" in summary + assert "X Premium+ does NOT include" in summary def test_summarize_api_error_idempotent_for_entitlement_hint(): @@ -243,7 +247,7 @@ def test_summarize_api_error_idempotent_for_entitlement_hint(): twice = AIAgent._decorate_xai_entitlement_error(once) assert once == twice # Sanity: the hint did fire on the first pass. - assert "Could be a missing subscription" in once + assert "X Premium+ does NOT include" in once def test_summarize_api_error_passes_through_unrelated_errors(): From 566d8f0d75049e5e4e4e3e3fde7f8c766ae235d6 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Fri, 15 May 2026 20:08:24 -0500 Subject: [PATCH 096/218] fix(tui): keep DECSTBM scroll region off bottom row (#26683) Avoid shifting the terminal's last visible row in the alt-screen DECSTBM fast path, which can leave transient scroll bleed/discoloration artifacts around the status lane until a repaint. Add regression tests to preserve the fast path when safe and skip it when the hint touches the bottom row. --- .../hermes-ink/src/ink/log-update.test.ts | 42 +++++++++++++++++++ .../packages/hermes-ink/src/ink/log-update.ts | 5 ++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts index 35c99f7e0a2..a11a028e771 100644 --- a/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts +++ b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts @@ -42,6 +42,8 @@ const stdoutOnly = (diff: ReturnType) => .map(p => (p as { type: 'stdout'; content: string }).content) .join('') +const hasDecstbm = (text: string) => /\x1b\[\d+;\d+r/.test(text) + describe('LogUpdate.render diff contract', () => { it('emits only changed cells when most rows match', () => { const w = 20 @@ -154,4 +156,44 @@ describe('LogUpdate.render diff contract', () => { expect(diff.some(p => p.type === 'clearTerminal')).toBe(true) expect(stdoutOnly(diff)).toContain('timer2s') }) + + it('keeps DECSTBM fast-path when scroll region stays above bottom row', () => { + const w = 12 + const h = 6 + const prev = mkScreen(w, h) + const next = mkScreen(w, h) + + paint(prev, 1, 'row one') + paint(next, 1, 'row one') + + const prevFrame = mkFrame(prev, w, h) + const nextFrame: Frame = { + ...mkFrame(next, w, h), + scrollHint: { top: 1, bottom: 4, delta: 1 } + } + const log = new LogUpdate({ isTTY: true, stylePool }) + const diff = log.render(prevFrame, nextFrame, true, true) + + expect(hasDecstbm(stdoutOnly(diff))).toBe(true) + }) + + it('skips DECSTBM when scroll region touches the bottom row', () => { + const w = 12 + const h = 6 + const prev = mkScreen(w, h) + const next = mkScreen(w, h) + + paint(prev, 1, 'row one') + paint(next, 1, 'row one') + + const prevFrame = mkFrame(prev, w, h) + const nextFrame: Frame = { + ...mkFrame(next, w, h), + scrollHint: { top: 1, bottom: 5, delta: 1 } + } + const log = new LogUpdate({ isTTY: true, stylePool }) + const diff = log.render(prevFrame, nextFrame, true, true) + + expect(hasDecstbm(stdoutOnly(diff))).toBe(false) + }) }) diff --git a/ui-tui/packages/hermes-ink/src/ink/log-update.ts b/ui-tui/packages/hermes-ink/src/ink/log-update.ts index 9a377c2c6f6..0f36d4641e7 100644 --- a/ui-tui/packages/hermes-ink/src/ink/log-update.ts +++ b/ui-tui/packages/hermes-ink/src/ink/log-update.ts @@ -175,7 +175,10 @@ export class LogUpdate { if (altScreen && next.scrollHint && decstbmSafe) { const { top, bottom, delta } = next.scrollHint - if (top >= 0 && bottom < prev.screen.height && bottom < next.screen.height) { + // Keep DECSTBM away from the terminal's last visible row. In alt-screen + // layouts we reserve that lane for status/cursor parking, and scrolling + // it can leave transient ghosting/bleed artifacts until a later repaint. + if (top >= 0 && bottom < prev.screen.height - 1 && bottom < next.screen.height - 1) { shiftRows(prev.screen, top, bottom, delta) scrollPatch = [ { From 006937f7d062f7f1dd830aa16476ce962bd30445 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Fri, 15 May 2026 20:19:02 -0500 Subject: [PATCH 097/218] fix(tui): handle timeout/error subagent statuses in /agents (#26687) Accept delegation timeout/error statuses in the TUI subagent model, normalize unknown status strings defensively, and harden /agents overlay rendering/sorting so unknown statuses cannot crash glyph/color lookup. Add regression tests for live event normalization and disk snapshot replay. --- .../createGatewayEventHandler.test.ts | 55 +++++++++++++++++++ .../src/__tests__/spawnHistoryStore.test.ts | 46 ++++++++++++++++ ui-tui/src/app/createGatewayEventHandler.ts | 29 ++++++++-- ui-tui/src/app/spawnHistoryStore.ts | 24 +++++++- ui-tui/src/components/agentsOverlay.tsx | 19 +++++-- ui-tui/src/components/thinking.tsx | 6 +- ui-tui/src/gatewayTypes.ts | 4 +- ui-tui/src/types.ts | 4 +- 8 files changed, 173 insertions(+), 14 deletions(-) create mode 100644 ui-tui/src/__tests__/spawnHistoryStore.test.ts diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index d74976d195e..cd278eecdf9 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -737,6 +737,61 @@ describe('createGatewayEventHandler', () => { expect(getTurnState().activity).toMatchObject([{ text: 'boom', tone: 'error' }]) }) + it('accepts timeout/error subagent terminal statuses and ignores stale live events', () => { + const appended: Msg[] = [] + const onEvent = createGatewayEventHandler(buildCtx(appended)) + + onEvent({ + payload: { goal: 'timeout child', subagent_id: 'sa-timeout', task_index: 0 }, + type: 'subagent.start' + } as any) + onEvent({ + payload: { goal: 'timeout child', status: 'timeout', subagent_id: 'sa-timeout', task_index: 0 }, + type: 'subagent.complete' + } as any) + + expect(getTurnState().subagents.find(s => s.id === 'sa-timeout')?.status).toBe('timeout') + + // Late start/spawn updates must not clobber terminal timeout/error states. + onEvent({ + payload: { goal: 'timeout child', subagent_id: 'sa-timeout', task_index: 0 }, + type: 'subagent.start' + } as any) + onEvent({ + payload: { goal: 'timeout child', subagent_id: 'sa-timeout', task_index: 0 }, + type: 'subagent.spawn_requested' + } as any) + + expect(getTurnState().subagents.find(s => s.id === 'sa-timeout')?.status).toBe('timeout') + + onEvent({ + payload: { goal: 'error child', subagent_id: 'sa-error', task_index: 1 }, + type: 'subagent.start' + } as any) + onEvent({ + payload: { goal: 'error child', status: 'error', subagent_id: 'sa-error', task_index: 1 }, + type: 'subagent.complete' + } as any) + + expect(getTurnState().subagents.find(s => s.id === 'sa-error')?.status).toBe('error') + }) + + it('normalizes unknown subagent.complete statuses to completed', () => { + const appended: Msg[] = [] + const onEvent = createGatewayEventHandler(buildCtx(appended)) + + onEvent({ + payload: { goal: 'weird child', subagent_id: 'sa-weird', task_index: 2 }, + type: 'subagent.start' + } as any) + onEvent({ + payload: { goal: 'weird child', status: 'mystery_status', subagent_id: 'sa-weird', task_index: 2 }, + type: 'subagent.complete' + } as any) + + expect(getTurnState().subagents.find(s => s.id === 'sa-weird')?.status).toBe('completed') + }) + it('drops stale reasoning/tool/todos events after ctrl-c until the next message starts', () => { // Repro for the discord report: ctrl-c interrupts, but late reasoning/tool // events from the still-winding-down agent loop kept populating the UI for diff --git a/ui-tui/src/__tests__/spawnHistoryStore.test.ts b/ui-tui/src/__tests__/spawnHistoryStore.test.ts new file mode 100644 index 00000000000..544280e5c42 --- /dev/null +++ b/ui-tui/src/__tests__/spawnHistoryStore.test.ts @@ -0,0 +1,46 @@ +import { beforeEach, describe, expect, it } from 'vitest' + +import { clearSpawnHistory, getSpawnHistory, pushDiskSnapshot } from '../app/spawnHistoryStore.js' + +describe('spawnHistoryStore status normalization', () => { + beforeEach(() => { + clearSpawnHistory() + }) + + it('keeps timeout/error statuses from disk snapshots', () => { + pushDiskSnapshot( + { + finished_at: 1_700_000_001, + label: 'status test', + session_id: 'sess-1', + started_at: 1_700_000_000, + subagents: [ + { goal: 'timeout child', id: 'sa-timeout', index: 0, status: 'timeout' }, + { goal: 'error child', id: 'sa-error', index: 1, status: 'error' } + ] + }, + '/tmp/snap-timeout-error.json' + ) + + const statuses = getSpawnHistory()[0]?.subagents.map(s => s.status) + + expect(statuses).toEqual(['timeout', 'error']) + }) + + it('falls back unknown disk statuses to completed', () => { + pushDiskSnapshot( + { + finished_at: 1_700_000_011, + label: 'unknown status test', + session_id: 'sess-2', + started_at: 1_700_000_010, + subagents: [{ goal: 'mystery child', id: 'sa-unknown', index: 0, status: 'mystery_status' }] + }, + '/tmp/snap-unknown.json' + ) + + const status = getSpawnHistory()[0]?.subagents[0]?.status + + expect(status).toBe('completed') + }) +}) diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 555a35e8afe..ca269a131b4 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -13,7 +13,7 @@ import { rpcErrorMessage } from '../lib/rpc.js' import { topLevelSubagents } from '../lib/subagentTree.js' import { formatToolCall, stripAnsi } from '../lib/text.js' import { fromSkin } from '../theme.js' -import type { Msg, SubagentProgress } from '../types.js' +import type { Msg, SubagentProgress, SubagentStatus } from '../types.js' import { applyDelegationStatus, getDelegationState } from './delegationStore.js' import type { GatewayEventHandlerContext } from './interfaces.js' @@ -54,6 +54,26 @@ const pushThinking = pushUnique(6) const pushNote = pushUnique(6) const pushTool = pushUnique(8) +const KNOWN_SUBAGENT_STATUSES = new Set([ + 'completed', + 'error', + 'failed', + 'interrupted', + 'queued', + 'running', + 'timeout' +]) + +const normalizeSubagentStatus = (status: unknown, fallback: SubagentStatus): SubagentStatus => { + if (typeof status !== 'string') { + return fallback + } + + const normalized = status.toLowerCase() as SubagentStatus + + return KNOWN_SUBAGENT_STATUSES.has(normalized) ? normalized : fallback +} + export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: GatewayEvent) => void { const { rpc } = ctx.gateway const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session @@ -180,8 +200,9 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: // Terminal statuses are never overwritten by late-arriving live events — // otherwise a stale `subagent.start` / `spawn_requested` can clobber a - // `failed` or `interrupted` terminal state (Copilot review #14045). - const isTerminalStatus = (s: SubagentProgress['status']) => s === 'completed' || s === 'failed' || s === 'interrupted' + // terminal state from complete (failed/interrupted/timeout/error). + const isTerminalStatus = (s: SubagentProgress['status']) => + s === 'completed' || s === 'error' || s === 'failed' || s === 'interrupted' || s === 'timeout' const keepTerminalElseRunning = (s: SubagentProgress['status']) => (isTerminalStatus(s) ? s : 'running') @@ -648,7 +669,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: ev.payload, c => ({ durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds, - status: ev.payload.status ?? 'completed', + status: normalizeSubagentStatus(ev.payload.status, 'completed'), summary: ev.payload.summary || ev.payload.text || c.summary }), { createIfMissing: false } diff --git a/ui-tui/src/app/spawnHistoryStore.ts b/ui-tui/src/app/spawnHistoryStore.ts index 9adb2b59cd0..ec36148403d 100644 --- a/ui-tui/src/app/spawnHistoryStore.ts +++ b/ui-tui/src/app/spawnHistoryStore.ts @@ -1,7 +1,7 @@ import { atom } from 'nanostores' import type { SpawnTreeLoadResponse } from '../gatewayTypes.js' -import type { SubagentProgress } from '../types.js' +import type { SubagentProgress, SubagentStatus } from '../types.js' export interface SpawnSnapshot { finishedAt: number @@ -21,6 +21,26 @@ export interface SpawnDiffPair { const HISTORY_LIMIT = 10 +const KNOWN_SUBAGENT_STATUSES = new Set([ + 'completed', + 'error', + 'failed', + 'interrupted', + 'queued', + 'running', + 'timeout' +]) + +const normalizeSubagentStatus = (status: unknown, fallback: SubagentStatus): SubagentStatus => { + if (typeof status !== 'string') { + return fallback + } + + const normalized = status.toLowerCase() as SubagentStatus + + return KNOWN_SUBAGENT_STATUSES.has(normalized) ? normalized : fallback +} + export const $spawnHistory = atom([]) export const $spawnDiff = atom(null) @@ -128,7 +148,7 @@ function normaliseSubagent(raw: unknown): SubagentProgress { parentId: s(o.parentId) ?? null, reasoningTokens: n(o.reasoningTokens), startedAt: n(o.startedAt), - status: (s(o.status) as SubagentProgress['status']) ?? 'completed', + status: normalizeSubagentStatus(o.status, 'completed'), summary: s(o.summary), taskCount: typeof o.taskCount === 'number' ? o.taskCount : 1, thinking: (arr(o.thinking) ?? []).filter(x => typeof x === 'string'), diff --git a/ui-tui/src/components/agentsOverlay.tsx b/ui-tui/src/components/agentsOverlay.tsx index a1b349827cc..497230c3934 100644 --- a/ui-tui/src/components/agentsOverlay.tsx +++ b/ui-tui/src/components/agentsOverlay.tsx @@ -57,25 +57,33 @@ const FILTER_LABEL: Record = { } const STATUS_RANK: Record = { + error: 0, failed: 0, interrupted: 1, + timeout: 1, running: 2, queued: 3, completed: 4 } +const statusRank = (status: string): number => STATUS_RANK[status as Status] ?? STATUS_RANK.error + const SORT_COMPARATORS: Record number> = { 'depth-first': (a, b) => a.item.depth - b.item.depth || a.item.index - b.item.index, 'tools-desc': (a, b) => b.aggregate.totalTools - a.aggregate.totalTools, 'duration-desc': (a, b) => b.aggregate.totalDuration - a.aggregate.totalDuration, - status: (a, b) => STATUS_RANK[a.item.status] - STATUS_RANK[b.item.status] + status: (a, b) => statusRank(a.item.status) - statusRank(b.item.status) } const FILTER_PREDICATES: Record boolean> = { all: () => true, leaf: n => n.children.length === 0, running: n => n.item.status === 'running' || n.item.status === 'queued', - failed: n => n.item.status === 'failed' || n.item.status === 'interrupted' + failed: n => + n.item.status === 'error' || + n.item.status === 'failed' || + n.item.status === 'interrupted' || + n.item.status === 'timeout' } const STATUS_GLYPH: Record string; glyph: string }> = { @@ -83,7 +91,9 @@ const STATUS_GLYPH: Record string; glyph: string queued: { color: t => t.color.muted, glyph: '○' }, completed: { color: t => t.color.statusGood, glyph: '✓' }, interrupted: { color: t => t.color.warn, glyph: '■' }, - failed: { color: t => t.color.error, glyph: '✗' } + failed: { color: t => t.color.error, glyph: '✗' }, + timeout: { color: t => t.color.warn, glyph: '⌛' }, + error: { color: t => t.color.error, glyph: '⚠' } } // Heatmap palette — cold → hot, resolved against the active theme. @@ -111,7 +121,8 @@ const formatRowId = (n: number): string => String(n + 1).padStart(2, ' ') const cycle = (order: readonly T[], current: T): T => order[(order.indexOf(current) + 1) % order.length]! const statusGlyph = (item: SubagentProgress, t: Theme) => { - const g = STATUS_GLYPH[item.status] + // Defensive fallback for cross-version snapshots with unknown statuses. + const g = STATUS_GLYPH[item.status] ?? STATUS_GLYPH.error return { color: g.color(t), glyph: g.glyph } } diff --git a/ui-tui/src/components/thinking.tsx b/ui-tui/src/components/thinking.tsx index 4204ff56a0f..6908795f621 100644 --- a/ui-tui/src/components/thinking.tsx +++ b/ui-tui/src/components/thinking.tsx @@ -327,7 +327,11 @@ function SubagentAccordion({ const aggregate = node.aggregate const statusTone: 'dim' | 'error' | 'warn' = - item.status === 'failed' ? 'error' : item.status === 'interrupted' ? 'warn' : 'dim' + item.status === 'error' || item.status === 'failed' + ? 'error' + : item.status === 'interrupted' || item.status === 'timeout' + ? 'warn' + : 'dim' const prefix = item.taskCount > 1 ? `[${item.index + 1}/${item.taskCount}] ` : '' const goalLabel = item.goal || `Subagent ${item.index + 1}` diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts index 8c5cb18b23d..ab85c39fbdd 100644 --- a/ui-tui/src/gatewayTypes.ts +++ b/ui-tui/src/gatewayTypes.ts @@ -1,4 +1,4 @@ -import type { SessionInfo, SlashCategory, Usage } from './types.js' +import type { SessionInfo, SlashCategory, SubagentStatus, Usage } from './types.js' export interface GatewaySkin { banner_hero?: string @@ -394,7 +394,7 @@ export interface SubagentEventPayload { output_tokens?: number parent_id?: null | string reasoning_tokens?: number - status?: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running' + status?: SubagentStatus subagent_id?: string summary?: string task_count?: number diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts index 658b9cc13d2..62f580090d2 100644 --- a/ui-tui/src/types.ts +++ b/ui-tui/src/types.ts @@ -17,6 +17,8 @@ export interface ActivityItem { tone: 'error' | 'info' | 'warn' } +export type SubagentStatus = 'completed' | 'error' | 'failed' | 'interrupted' | 'queued' | 'running' | 'timeout' + export interface SubagentProgress { apiCalls?: number costUsd?: number @@ -36,7 +38,7 @@ export interface SubagentProgress { parentId: null | string reasoningTokens?: number startedAt?: number - status: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running' + status: SubagentStatus summary?: string taskCount: number thinking: string[] From 55c9f32060bbe7eb48bee2b702c157408b468eb2 Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Sat, 16 May 2026 06:55:56 +0530 Subject: [PATCH 098/218] fix(tui): width-aware markdown table rendering with vertical fallback (#26195) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor(tui): thread cols through Md/StreamingMd/renderTable, update cache key * feat(tui): three-tier width calc + full-line string rendering in renderTable Replaces the old renderTable (L203-244) with: - Empty table guard - Ragged row normalization - Three-tier column width calculation (ideal → proportional shrink → hard scale) - Rounding remainder distribution - Full-line string rendering (one per row, not per cell) - wrap=truncate-end on all table lines - All cells rendered as plain text via stripInlineMarkup No wrapping or vertical fallback yet — those come in Phase 3 and 4. * feat(tui): wrapCell with grapheme-safe hard-break + multi-line row rendering Adds: - Intl.Segmenter-based grapheme splitting (fallback to [...word]) - wrapCell() for width-correct word wrapping on stripped text - Multi-line row rendering with LineEntry metadata (header/separator/body) - Post-render safety condition (maxLineWidth computed, vertical fallback in Task 4) - Non-wrapping path preserved for tables that fit at ideal widths * feat(tui): vertical key-value fallback with scaled threshold + safety check Wires: - Scaled row-height threshold (numCols<=3: 8, <=6: 5, else: 4) - Post-render safety check (maxLineWidth > available space) - Header-only edge case - Vertical format: bold headers, stripped cell text, clamped separator width - Iterates headers (not rows) for consistent key-value fields on ragged rows * test(tui): pass cols to Md in test helpers, add width-overflow assertions - renderAtWidth now passes cols={columns} to so width-aware code paths are exercised in tests - tableFuzz: every rendered line must fit within allocated width (stringWidth) - tableRepro: separator regex updated to match truncation ellipsis - stringWidth imported from @hermes/ink for CJK-correct assertions * fix(tui): address adversarial review — comment tier 3 budget overshoot, eliminate redundant wrapCell - Add comment on Tier 3 MIN_COL_WIDTH clamp exceeding budget (self-heals via safetyOverflow) - Track tallestBodyRow during allEntries build pass instead of re-wrapping every cell in a second traversal (eliminates O(cells) of redundant stripInlineMarkup+stringWidth) * fix(tui): pass cols to recursive fenced-markdown Md, fix test frame extraction - Thread cols into for fenced markdown blocks (L734) so nested tables use the width-aware renderer instead of max-content path - Fix renderAtWidth helpers to extract final Ink repaint frame instead of concatenating all intermediate frames (REPAINT_RE split) - Add fenced-markdown-table fixture to tableFuzz (exercises the nested path) * chore: remove repro test suites and tmux driver script These were scaffolding for development/reproduction — not needed in the PR. --- ui-tui/src/components/markdown.tsx | 325 +++++++++++++++++--- ui-tui/src/components/messageLine.tsx | 6 +- ui-tui/src/components/streamingMarkdown.tsx | 11 +- 3 files changed, 295 insertions(+), 47 deletions(-) diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx index ae234eb9ec7..c215cd811bf 100644 --- a/ui-tui/src/components/markdown.tsx +++ b/ui-tui/src/components/markdown.tsx @@ -200,44 +200,288 @@ export const stripInlineMarkup = (v: string) => .replace(/(? { - // Column widths in *display cells*, not UTF-16 code units. CJK - // glyphs and most emoji render as two cells but `String#length` - // counts them as one, which collapses Chinese / Japanese / Korean - // tables into drift across rows. `stringWidth` (Bun.stringWidth - // fast path + an East-Asian-width-aware fallback, memoised in - // @hermes/ink) returns the actual cell count. - const cellWidth = (raw: string) => stringWidth(stripInlineMarkup(raw)) +const SAFETY_MARGIN = 4 +const MIN_COL_WIDTH = 3 +const COL_GAP = 2 // the ' ' between columns +const TABLE_PADDING_LEFT = 2 // paddingLeft={2} on the outer - const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => cellWidth(r[ci] ?? '')))) +const renderTable = (k: number, rows: string[][], t: Theme, cols?: number) => { + // Guard: empty table + if (rows.length === 0 || rows[0]!.length === 0) return null - // Thin divider under the header. Without it tables look like prose - // with extra spacing because the header is just accent-coloured text - // (#15534). We avoid full borders on purpose — column widths come - // from `stringWidth(...)`, so the dividers and the row content stay - // in sync on CJK / emoji tables; tab-style column gaps still read - // cleanly without the boxed look. - const sep = widths.map(w => '─'.repeat(Math.max(1, w))).join(' ') + const cellDisplayWidth = (raw: string) => stringWidth(stripInlineMarkup(raw)) - return ( - - {rows.map((row, ri) => ( - - - {widths.map((w, ci) => ( - - - {' '.repeat(Math.max(0, w - cellWidth(row[ci] ?? '')))} - {ci < widths.length - 1 ? ' ' : ''} - - ))} - - {ri === 0 && rows.length > 1 ? ( - - {sep} + // Minimum width: longest word in a cell (to avoid breaking words) + const minCellWidth = (raw: string) => { + const text = stripInlineMarkup(raw) + const words = text.split(/\s+/).filter(w => w.length > 0) + if (words.length === 0) return MIN_COL_WIDTH + return Math.max(...words.map(w => stringWidth(w)), MIN_COL_WIDTH) + } + + const numCols = rows[0]!.length + + // Normalize ragged rows: ensure every row has exactly numCols cells + const normalizedRows = rows.map(row => { + if (row.length >= numCols) return row.slice(0, numCols) + return [...row, ...Array(numCols - row.length).fill('')] + }) + + // Ideal widths: max cell content per column + const idealWidths = normalizedRows[0]!.map((_, ci) => + Math.max(...normalizedRows.map(r => cellDisplayWidth(r[ci] ?? '')), MIN_COL_WIDTH) + ) + + // Min widths: longest word per column + const minWidths = normalizedRows[0]!.map((_, ci) => + Math.max(...normalizedRows.map(r => minCellWidth(r[ci] ?? '')), MIN_COL_WIDTH) + ) + + // Available width: cols minus table padding minus column gaps minus safety. + // transcriptBodyWidth (source of cols) subtracts message gutter + scrollbar, + // but NOT this table's paddingLeft — we subtract it here. + const gapOverhead = (numCols - 1) * COL_GAP + const availableWidth = cols + ? Math.max(cols - TABLE_PADDING_LEFT - gapOverhead - SAFETY_MARGIN, numCols * MIN_COL_WIDTH) + : Infinity + + const totalIdeal = idealWidths.reduce((a, b) => a + b, 0) + const totalMin = minWidths.reduce((a, b) => a + b, 0) + + let columnWidths: number[] + let needsWrap = false + + if (totalIdeal <= availableWidth) { + // Tier 1: everything fits at ideal widths + columnWidths = idealWidths + } else if (totalMin <= availableWidth) { + // Tier 2: proportional shrink — distribute extra space beyond minimums + needsWrap = true + const extraSpace = availableWidth - totalMin + const overflows = idealWidths.map((ideal, i) => ideal - minWidths[i]!) + const totalOverflow = overflows.reduce((a, b) => a + b, 0) + if (totalOverflow === 0) { + columnWidths = [...minWidths] + } else { + const rawAlloc = minWidths.map((min, i) => + min + (overflows[i]! / totalOverflow) * extraSpace + ) + columnWidths = rawAlloc.map(v => Math.floor(v)) + // Distribute rounding remainders to columns with largest fractional part + let remainder = availableWidth - columnWidths.reduce((a, b) => a + b, 0) + const fracs = rawAlloc.map((v, i) => ({ i, frac: v - Math.floor(v) })) + .sort((a, b) => b.frac - a.frac) + for (const { i } of fracs) { + if (remainder <= 0) break + columnWidths[i]!++ + remainder-- + } + } + } else { + // Tier 3: even min-widths don't fit — scale proportionally, allow hard breaks. + // NOTE: Math.max(..., MIN_COL_WIDTH) can push total above availableWidth when + // many columns are scaled below 3. This is caught by safetyOverflow → vertical fallback. + needsWrap = true + const scaleFactor = availableWidth / totalMin + const rawAlloc = minWidths.map(w => w * scaleFactor) + columnWidths = rawAlloc.map(v => Math.max(Math.floor(v), MIN_COL_WIDTH)) + let remainder = availableWidth - columnWidths.reduce((a, b) => a + b, 0) + const fracs = rawAlloc.map((v, i) => ({ i, frac: v - Math.floor(v) })) + .sort((a, b) => b.frac - a.frac) + for (const { i } of fracs) { + if (remainder <= 0) break + columnWidths[i]!++ + remainder-- + } + } + + // Grapheme-safe hard-break: prefer Intl.Segmenter, fall back to code-point split + const segmenter = typeof Intl !== 'undefined' && 'Segmenter' in Intl + ? new (Intl as any).Segmenter(undefined, { granularity: 'grapheme' }) + : null + + const graphemes = (s: string): string[] => + segmenter + ? [...segmenter.segment(s)].map((seg: { segment: string }) => seg.segment) + : [...s] + + // Word-wrap plain text to fit within `width` display columns. + // Operates on stripped text for correct width measurement. + const wrapCell = (raw: string, width: number, hard: boolean): string[] => { + const text = stripInlineMarkup(raw) + if (width <= 0) return [text] + if (stringWidth(text) <= width) return [text] + + const words = text.split(/\s+/).filter(w => w.length > 0) + const lines: string[] = [] + let current = '' + let currentWidth = 0 + + for (const word of words) { + const w = stringWidth(word) + if (currentWidth === 0) { + if (hard && w > width) { + for (const ch of graphemes(word)) { + const cw = stringWidth(ch) + if (currentWidth + cw > width && current) { + lines.push(current) + current = '' + currentWidth = 0 + } + current += ch + currentWidth += cw + } + } else { + current = word + currentWidth = w + } + } else if (currentWidth + 1 + w <= width) { + current += ' ' + word + currentWidth += 1 + w + } else { + lines.push(current) + current = word + currentWidth = w + } + } + if (current) lines.push(current) + return lines.length > 0 ? lines : [''] + } + + const isHard = totalMin > availableWidth // tier 3 needs hard word breaks + const sep = columnWidths.map(w => '─'.repeat(Math.max(1, w))).join(' ') + + // When wrapping isn't needed, build single-line strings per row. + // All cells render as plain text via stripInlineMarkup. + // TODO: follow-up — format to ANSI then wrap with wrapAnsi for inline markdown preservation. + // See free-code/src/components/MarkdownTable.tsx L44-L62 for approach. + if (!needsWrap) { + const buildRowString = (row: string[]): string => + row.map((cell, ci) => { + const text = stripInlineMarkup(cell) + const pad = ' '.repeat(Math.max(0, columnWidths[ci]! - stringWidth(text))) + const gap = ci < numCols - 1 ? ' ' : '' + return text + pad + gap + }).join('') + + return ( + + {normalizedRows.map((row, ri) => ( + + + {buildRowString(row)} - ) : null} - + {ri === 0 && normalizedRows.length > 1 ? ( + {sep} + ) : null} + + ))} + + ) + } + + // Wrapping path: build multi-line rows as complete strings. + type LineEntry = { text: string; kind: 'header' | 'separator' | 'body' } + + const buildRowLines = (row: string[]): string[] => { + const cellLines = row.map((cell, ci) => + wrapCell(cell, columnWidths[ci]!, isHard) + ) + const maxLines = Math.max(...cellLines.map(l => l.length), 1) + + const result: string[] = [] + for (let li = 0; li < maxLines; li++) { + let line = '' + for (let ci = 0; ci < numCols; ci++) { + const cl = cellLines[ci] ?? [''] + const cellText = li < cl.length ? cl[li]! : '' + const pad = ' '.repeat(Math.max(0, columnWidths[ci]! - stringWidth(cellText))) + line += cellText + pad + if (ci < numCols - 1) line += ' ' + } + result.push(line) + } + return result + } + + // Build all lines with metadata for styling, tracking tallest body row + const allEntries: LineEntry[] = [] + let tallestBodyRow = 0 + normalizedRows.forEach((row, ri) => { + const kind = ri === 0 ? 'header' as const : 'body' as const + const rowLines = buildRowLines(row) + rowLines.forEach(text => allEntries.push({ text, kind })) + if (ri > 0) tallestBodyRow = Math.max(tallestBodyRow, rowLines.length) + if (ri === 0 && normalizedRows.length > 1) { + allEntries.push({ text: sep, kind: 'separator' }) + } + }) + + // Post-render safety condition: compute max line width. + const maxLineWidth = Math.max(...allEntries.map(e => stringWidth(e.text))) + const safetyOverflow = cols != null && maxLineWidth > cols - TABLE_PADDING_LEFT - SAFETY_MARGIN + + // Scaled vertical threshold — 2-3 col tables stay tabular even with tall cells + const maxRowLinesThreshold = numCols <= 3 ? 8 : numCols <= 6 ? 5 : 4 + + const useVertical = tallestBodyRow > maxRowLinesThreshold || safetyOverflow + + if (useVertical) { + // Edge case: header-only table + if (normalizedRows.length <= 1) { + return ( + + + {normalizedRows[0]!.map(h => stripInlineMarkup(h)).join(' · ')} + + + ) + } + + const headers = normalizedRows[0]! + const dataRows = normalizedRows.slice(1) + const sepWidth = Math.max(1, cols ? Math.min(cols - TABLE_PADDING_LEFT - 1, 40) : 40) + + return ( + + {dataRows.map((row, ri) => ( + + {ri > 0 ? ( + {'─'.repeat(sepWidth)} + ) : null} + {headers.map((header, ci) => { + const cell = row[ci] ?? '' + const label = stripInlineMarkup(header) || `Col ${ci + 1}` + return ( + + {label}: + {' '}{stripInlineMarkup(cell)} + + ) + })} + + ))} + + ) + } + + // Render wrapped horizontal rows — one per visual line. + return ( + + {allEntries.map((entry, i) => ( + + {entry.text} + ))} ) @@ -395,10 +639,10 @@ const cacheSet = (b: Map, key: string, v: ReactNode[]) => { } } -function MdImpl({ compact, t, text }: MdProps) { +function MdImpl({ cols, compact, t, text }: MdProps) { const nodes = useMemo(() => { const bucket = cacheBucket(t) - const cacheKey = `${compact ? '1' : '0'}|${text}` + const cacheKey = `${compact ? '1' : '0'}|${cols ?? ''}|${text}` const cached = cacheGet(bucket, cacheKey) if (cached) { @@ -490,7 +734,7 @@ function MdImpl({ compact, t, text }: MdProps) { if (['md', 'markdown'].includes(lang)) { start('paragraph') - nodes.push() + nodes.push() continue } @@ -785,7 +1029,7 @@ function MdImpl({ compact, t, text }: MdProps) { rows.push(splitRow(lines[i]!)) } - nodes.push(renderTable(key, rows, t)) + nodes.push(renderTable(key, rows, t, cols)) continue } @@ -838,7 +1082,7 @@ function MdImpl({ compact, t, text }: MdProps) { } if (rows.length) { - nodes.push(renderTable(key, rows, t)) + nodes.push(renderTable(key, rows, t, cols)) } continue @@ -852,7 +1096,7 @@ function MdImpl({ compact, t, text }: MdProps) { cacheSet(bucket, cacheKey, nodes) return nodes - }, [compact, t, text]) + }, [cols, compact, t, text]) return {nodes} } @@ -862,6 +1106,7 @@ export const Md = memo(MdImpl) type Kind = 'blank' | 'code' | 'heading' | 'list' | 'paragraph' | 'quote' | 'rule' | 'table' | null interface MdProps { + cols?: number compact?: boolean t: Theme text: string diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx index 950b61b4d72..238b551ae97 100644 --- a/ui-tui/src/components/messageLine.tsx +++ b/ui-tui/src/components/messageLine.tsx @@ -139,13 +139,15 @@ export const MessageLine = memo(function MessageLine({ } if (msg.role === 'assistant') { + const bodyWidth = transcriptBodyWidth(cols, msg.role, t.brand.prompt) + return isStreaming ? ( // Incremental markdown: split at the last stable block boundary so // only the in-flight tail re-tokenizes per delta. See // streamingMarkdown.tsx for the cost model. - + ) : ( - + ) } diff --git a/ui-tui/src/components/streamingMarkdown.tsx b/ui-tui/src/components/streamingMarkdown.tsx index 1be70b283a8..786a3812461 100644 --- a/ui-tui/src/components/streamingMarkdown.tsx +++ b/ui-tui/src/components/streamingMarkdown.tsx @@ -128,7 +128,7 @@ export const findStableBoundary = (text: string) => { return -1 } -export const StreamingMd = memo(function StreamingMd({ compact, t, text }: StreamingMdProps) { +export const StreamingMd = memo(function StreamingMd({ cols, compact, t, text }: StreamingMdProps) { const stablePrefixRef = useRef('') // Reset if the text no longer starts with our recorded prefix (defensive; @@ -151,22 +151,23 @@ export const StreamingMd = memo(function StreamingMd({ compact, t, text }: Strea const unstableSuffix = text.slice(stablePrefix.length) if (!stablePrefix) { - return + return } if (!unstableSuffix) { - return + return } return ( - - + + ) }) interface StreamingMdProps { + cols?: number compact?: boolean t: Theme text: string From 86a368d8322b3977bf89b9043818eebc6adf470b Mon Sep 17 00:00:00 2001 From: emozilla Date: Fri, 15 May 2026 22:14:41 -0400 Subject: [PATCH 099/218] remove pip installation method from docs --- website/docs/getting-started/installation.md | 25 -------------------- 1 file changed, 25 deletions(-) diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md index a88f4c8bd1c..10420d8df55 100644 --- a/website/docs/getting-started/installation.md +++ b/website/docs/getting-started/installation.md @@ -10,31 +10,6 @@ Get Hermes Agent up and running in under two minutes with the one-line installer ## Quick Install -### pip (recommended for most users) - -```bash -pip install hermes-agent -``` - -This gives you the full Hermes Agent — CLI, web dashboard, and TUI — with zero external dependencies for core usage. Node.js, browser engines, and other optional tools are bootstrapped lazily on first use (e.g. when you run `hermes --tui` or use browser tools). - -PyPI releases track **tagged versions** (major and minor releases), not every commit on `main`. If you want bleeding-edge changes as they land, use the git install below. - -After installing, run: - -```bash -hermes setup # interactive wizard — configures your LLM provider and API key -hermes # start chatting -``` - -:::tip Optional: install everything upfront -`hermes postinstall` installs Node.js, browser engines, ripgrep, and ffmpeg in one shot — then runs the setup wizard. Use this if you want the full experience (TUI, browser tools, voice) without waiting for lazy installs on first use. -::: - -:::tip -If you have [uv](https://docs.astral.sh/uv/) installed, `uv pip install hermes-agent` is faster. -::: - ### One-Line Installer (Linux / macOS / WSL2) For a git-based install that tracks `main` and gives you the latest changes immediately: From 63503ebb14069e8ba0bea91955e7ce4e01670a4e Mon Sep 17 00:00:00 2001 From: Austin Pickett Date: Fri, 15 May 2026 22:40:21 -0400 Subject: [PATCH 100/218] fix(dashboard): clarify Kanban Ready vs assignment Ready column help and fallbacks now describe dependency-ready work; show a badge on unassigned ready cards and fix the stale unassigned tooltip. Align localized Ready help strings with the new semantics. Co-authored-by: Cursor --- plugins/kanban/dashboard/dist/index.js | 14 ++++++++++++-- plugins/kanban/dashboard/dist/style.css | 8 ++++++++ web/src/i18n/af.ts | 2 +- web/src/i18n/de.ts | 2 +- web/src/i18n/en.ts | 5 ++++- web/src/i18n/es.ts | 2 +- web/src/i18n/fr.ts | 2 +- web/src/i18n/ga.ts | 2 +- web/src/i18n/hu.ts | 2 +- web/src/i18n/it.ts | 2 +- web/src/i18n/ja.ts | 2 +- web/src/i18n/ko.ts | 2 +- web/src/i18n/pt.ts | 2 +- web/src/i18n/ru.ts | 2 +- web/src/i18n/tr.ts | 2 +- web/src/i18n/types.ts | 2 ++ web/src/i18n/zh-hant.ts | 2 +- web/src/i18n/zh.ts | 2 +- 18 files changed, 40 insertions(+), 17 deletions(-) diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js index 720cdb9e1e2..6f05df72bf6 100644 --- a/plugins/kanban/dashboard/dist/index.js +++ b/plugins/kanban/dashboard/dist/index.js @@ -68,7 +68,7 @@ const FALLBACK_COLUMN_HELP = { triage: "Raw ideas — a specifier will flesh out the spec", todo: "Waiting on dependencies or unassigned", - ready: "Assigned and waiting for a dispatcher tick", + ready: "Dependencies satisfied; assign a profile to dispatch", running: "Claimed by a worker — in-flight", blocked: "Worker asked for human input", done: "Completed", @@ -2048,6 +2048,7 @@ }; const progress = t.progress; + const needsAssignee = t.status === "ready" && !t.assignee; return h("div", { ref: cardRef, @@ -2118,6 +2119,13 @@ title: `${progress.done} of ${progress.total} child tasks done`, }, `${progress.done}/${progress.total}`) : null, + needsAssignee + ? h(Badge, { + variant: "outline", + className: "hermes-kanban-needs-assignee", + title: tx(i18n, "needsAssigneeHint", "Dependencies are satisfied, but the dispatcher skips this task until you assign a profile."), + }, tx(i18n, "needsAssignee", "Needs assignee")) + : null, ), h("div", { className: "hermes-kanban-card-title" }, t.title || tx(i18n, "untitled", "(untitled)")), @@ -2126,7 +2134,9 @@ ? h("span", { className: "hermes-kanban-assignee", title: `Assigned to Hermes profile @${t.assignee}` }, "@", t.assignee) : h("span", { className: "hermes-kanban-unassigned", - title: "No profile assigned. The dispatcher will pick one from available profiles when the task is Ready." }, + title: needsAssignee + ? tx(i18n, "needsAssigneeHint", "Dependencies are satisfied, but the dispatcher skips this task until you assign a profile.") + : "No profile assigned." }, tx(i18n, "unassigned", "unassigned")), t.comment_count > 0 ? h("span", { className: "hermes-kanban-count", diff --git a/plugins/kanban/dashboard/dist/style.css b/plugins/kanban/dashboard/dist/style.css index 3bcfccb289b..f3d66a88597 100644 --- a/plugins/kanban/dashboard/dist/style.css +++ b/plugins/kanban/dashboard/dist/style.css @@ -280,6 +280,14 @@ padding: 0.05rem 0.3rem !important; } +.hermes-kanban-needs-assignee { + font-size: 0.6rem !important; + padding: 0.05rem 0.3rem !important; + background: color-mix(in srgb, var(--color-warning, #d4b348) 16%, transparent); + border-color: color-mix(in srgb, var(--color-warning, #d4b348) 45%, var(--color-border)); + color: var(--color-foreground); +} + .hermes-kanban-assignee { font-weight: 500; color: color-mix(in srgb, var(--color-foreground) 80%, var(--color-muted-foreground)); diff --git a/web/src/i18n/af.ts b/web/src/i18n/af.ts index 4f49eb12227..e588a63596d 100644 --- a/web/src/i18n/af.ts +++ b/web/src/i18n/af.ts @@ -663,7 +663,7 @@ export const af: Translations = { columnHelp: { triage: "Rou idees — 'n spesifiseerder sal die spesifikasie uitwerk", todo: "Wag op afhanklikhede of nie toegewys nie", - ready: "Toegewys en wag vir 'n versender-tik", + ready: "Afhanklikhede is bevredig; wys 'n profiel toe om te versend", running: "Deur 'n werker geëis — in vlug", blocked: "Werker het mensinvoer aangevra", done: "Voltooi", diff --git a/web/src/i18n/de.ts b/web/src/i18n/de.ts index c70ccfe8701..28a9b59deff 100644 --- a/web/src/i18n/de.ts +++ b/web/src/i18n/de.ts @@ -662,7 +662,7 @@ export const de: Translations = { columnHelp: { triage: "Rohe Ideen — ein Specifier wird die Spezifikation ausarbeiten", todo: "Wartet auf Abhängigkeiten oder ist nicht zugewiesen", - ready: "Zugewiesen und wartet auf einen Dispatcher-Tick", + ready: "Abhängigkeiten erfüllt; Profil zum Dispatch zuweisen", running: "Von einem Worker übernommen — in Bearbeitung", blocked: "Worker hat um menschliche Eingabe gebeten", done: "Abgeschlossen", diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts index e93fdac7ec4..5eae3f9a14a 100644 --- a/web/src/i18n/en.ts +++ b/web/src/i18n/en.ts @@ -574,6 +574,9 @@ export const en: Translations = { createTask: "Create task in this column", noTasks: "— no tasks —", unassigned: "unassigned", + needsAssignee: "Needs assignee", + needsAssigneeHint: + "Dependencies are satisfied, but the dispatcher skips this task until you assign a profile.", untitled: "(untitled)", loadingDetail: "Loading…", addComment: "Add a comment… (Enter to submit)", @@ -664,7 +667,7 @@ export const en: Translations = { columnHelp: { triage: "Raw ideas — a specifier will flesh out the spec", todo: "Waiting on dependencies or unassigned", - ready: "Assigned and waiting for a dispatcher tick", + ready: "Dependencies satisfied; assign a profile to dispatch", running: "Claimed by a worker — in-flight", blocked: "Worker asked for human input", done: "Completed", diff --git a/web/src/i18n/es.ts b/web/src/i18n/es.ts index 19088de12c8..139a8175d44 100644 --- a/web/src/i18n/es.ts +++ b/web/src/i18n/es.ts @@ -662,7 +662,7 @@ export const es: Translations = { columnHelp: { triage: "Ideas en bruto — un specifier desarrollará la especificación", todo: "Esperando dependencias o sin asignar", - ready: "Asignado y esperando un tick del dispatcher", + ready: "Dependencias satisfechas; asigna un perfil para despachar", running: "Reclamado por un worker — en ejecución", blocked: "El worker pidió intervención humana", done: "Completado", diff --git a/web/src/i18n/fr.ts b/web/src/i18n/fr.ts index 4532cab3ee0..51b5ba54f12 100644 --- a/web/src/i18n/fr.ts +++ b/web/src/i18n/fr.ts @@ -662,7 +662,7 @@ export const fr: Translations = { columnHelp: { triage: "Idées brutes — un specifier rédigera la spécification", todo: "En attente de dépendances ou non assigné", - ready: "Assigné et en attente d'un tick du dispatcher", + ready: "Dépendances satisfaites ; assignez un profil pour dispatch", running: "Réclamé par un worker — en cours d'exécution", blocked: "Le worker a demandé une intervention humaine", done: "Terminé", diff --git a/web/src/i18n/ga.ts b/web/src/i18n/ga.ts index d75ec061b8b..4dc4e823430 100644 --- a/web/src/i18n/ga.ts +++ b/web/src/i18n/ga.ts @@ -663,7 +663,7 @@ export const ga: Translations = { columnHelp: { triage: "Smaointe amha — déanfaidh specifier an spec a chur i bhfeidhm", todo: "Ag fanacht ar spleáchais nó gan sannadh", - ready: "Sannta agus ag fanacht ar thic an dispatcher", + ready: "Tá na spleáchais sásaithe; sann próifíl le dispatch a dhéanamh", running: "Éilithe ag worker — ar siúl", blocked: "D'iarr an worker ionchur duine", done: "Críochnaithe", diff --git a/web/src/i18n/hu.ts b/web/src/i18n/hu.ts index f563c1dacc4..8b492f3bb16 100644 --- a/web/src/i18n/hu.ts +++ b/web/src/i18n/hu.ts @@ -663,7 +663,7 @@ export const hu: Translations = { columnHelp: { triage: "Nyers ötletek — egy specifier kidolgozza a specifikációt", todo: "Függőségekre vár vagy nincs felelőse", - ready: "Kiosztva, dispatcher tickre vár", + ready: "A függőségek teljesültek; rendelj hozzá profilt az indításhoz", running: "Worker felvette — folyamatban", blocked: "A worker emberi beavatkozást kért", done: "Befejezve", diff --git a/web/src/i18n/it.ts b/web/src/i18n/it.ts index 5e79d3115c3..86fce86589e 100644 --- a/web/src/i18n/it.ts +++ b/web/src/i18n/it.ts @@ -662,7 +662,7 @@ export const it: Translations = { columnHelp: { triage: "Idee grezze — un specifier elaborerà la specifica", todo: "In attesa di dipendenze o non assegnato", - ready: "Assegnato e in attesa di un tick del dispatcher", + ready: "Dipendenze soddisfatte; assegna un profilo per il dispatch", running: "Preso in carico da un worker — in esecuzione", blocked: "Il worker ha richiesto input umano", done: "Completato", diff --git a/web/src/i18n/ja.ts b/web/src/i18n/ja.ts index 175468e4d8b..154e11f5dbb 100644 --- a/web/src/i18n/ja.ts +++ b/web/src/i18n/ja.ts @@ -663,7 +663,7 @@ export const ja: Translations = { columnHelp: { triage: "未整理のアイデア — スペシファイアが仕様を肉付けします", todo: "依存関係の待機中、または未割り当て", - ready: "割り当て済み、ディスパッチャーのティック待ち", + ready: "依存関係は満たされています。ディスパッチするにはプロファイルを割り当ててください", running: "ワーカーが取得中 — 実行中", blocked: "ワーカーが人間の入力を求めています", done: "完了", diff --git a/web/src/i18n/ko.ts b/web/src/i18n/ko.ts index cfc40d63df7..4dafaeb9cde 100644 --- a/web/src/i18n/ko.ts +++ b/web/src/i18n/ko.ts @@ -663,7 +663,7 @@ export const ko: Translations = { columnHelp: { triage: "원시 아이디어 — 스페시파이어가 사양을 구체화합니다", todo: "종속성 대기 중 또는 미지정", - ready: "지정되었으며 디스패처 틱 대기 중", + ready: "종속성이 충족됨; 디스패치하려면 프로필을 지정하세요", running: "워커가 점유 중 — 실행 중", blocked: "워커가 사람의 입력을 요청함", done: "완료됨", diff --git a/web/src/i18n/pt.ts b/web/src/i18n/pt.ts index 6cdd40b8fe5..d32402dc92a 100644 --- a/web/src/i18n/pt.ts +++ b/web/src/i18n/pt.ts @@ -663,7 +663,7 @@ export const pt: Translations = { columnHelp: { triage: "Ideias em bruto — um specifier vai detalhar a especificação", todo: "À espera de dependências ou sem atribuição", - ready: "Atribuído e à espera de um tick do dispatcher", + ready: "Dependências satisfeitas; atribua um perfil para despachar", running: "Reivindicado por um worker — em execução", blocked: "O worker pediu intervenção humana", done: "Concluído", diff --git a/web/src/i18n/ru.ts b/web/src/i18n/ru.ts index c5b9a5b5038..79a6961b251 100644 --- a/web/src/i18n/ru.ts +++ b/web/src/i18n/ru.ts @@ -663,7 +663,7 @@ export const ru: Translations = { columnHelp: { triage: "Сырые идеи — specifier подготовит спецификацию", todo: "Ожидает зависимостей или без исполнителя", - ready: "Назначено и ждёт тика диспетчера", + ready: "Зависимости выполнены; назначьте профиль для диспетчеризации", running: "Взято воркером — выполняется", blocked: "Воркер запросил вмешательство человека", done: "Завершено", diff --git a/web/src/i18n/tr.ts b/web/src/i18n/tr.ts index 7de6ea1df7d..56670424abb 100644 --- a/web/src/i18n/tr.ts +++ b/web/src/i18n/tr.ts @@ -663,7 +663,7 @@ export const tr: Translations = { columnHelp: { triage: "Ham fikirler — bir specifier şartnameyi detaylandıracak", todo: "Bağımlılıklar bekleniyor veya atanmamış", - ready: "Atanmış ve dispatcher tick'i bekleniyor", + ready: "Bağımlılıklar karşılandı; dispatch için bir profil atayın", running: "Bir worker tarafından alındı — yürütülüyor", blocked: "Worker insan girdisi istedi", done: "Tamamlandı", diff --git a/web/src/i18n/types.ts b/web/src/i18n/types.ts index ca40b4a381f..55669a4b679 100644 --- a/web/src/i18n/types.ts +++ b/web/src/i18n/types.ts @@ -586,6 +586,8 @@ export interface Translations { createTask: string; noTasks: string; unassigned: string; + needsAssignee?: string; + needsAssigneeHint?: string; untitled: string; loadingDetail: string; addComment: string; diff --git a/web/src/i18n/zh-hant.ts b/web/src/i18n/zh-hant.ts index c79222cfe91..27f3a41b95f 100644 --- a/web/src/i18n/zh-hant.ts +++ b/web/src/i18n/zh-hant.ts @@ -663,7 +663,7 @@ export const zhHant: Translations = { columnHelp: { triage: "原始想法 — 規格制定者將完善規格", todo: "等待相依項目或尚未指派", - ready: "已指派,等待排程器輪詢", + ready: "相依項目已滿足;指派設定檔以便排程", running: "已被工作者領取 — 執行中", blocked: "工作者請求人工輸入", done: "已完成", diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts index 0a8ceb7962a..6290c473b82 100644 --- a/web/src/i18n/zh.ts +++ b/web/src/i18n/zh.ts @@ -659,7 +659,7 @@ export const zh: Translations = { columnHelp: { triage: "原始想法 — 规范制定者将完善规格", todo: "等待依赖项或未分配", - ready: "已分配,等待调度器轮询", + ready: "依赖项已满足;分配一个配置文件以便调度", running: "已被工作者认领 — 执行中", blocked: "工作者请求人工输入", done: "已完成", From 97a32afdc490e3d40b291dac0e67f291502052a0 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Fri, 15 May 2026 18:43:39 -0600 Subject: [PATCH 101/218] fix(auxiliary): resolve xai oauth compression from pool --- agent/auxiliary_client.py | 38 +++++++++++++-- run_agent.py | 19 ++++++-- tests/agent/test_auxiliary_client.py | 72 ++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 10 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 1c7dd9f7497..cfc44e5f2a6 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1272,12 +1272,40 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[ def _resolve_xai_oauth_for_aux() -> Optional[Tuple[str, str]]: """Resolve a fresh xAI OAuth (api_key, base_url) for auxiliary clients. - Routes through ``hermes_cli.auth``'s runtime resolver so the auto-refresh - path is shared with the main agent, instead of relying on whatever raw - tokens happen to be sitting in auth.json or the credential pool. Returns - ``None`` if the user is not authenticated with xAI Grok OAuth (so - ``_resolve_auto`` Step 1 falls through to the next provider in the chain). + Prefer the credential pool, matching the main runtime/provider status + path. Some xAI OAuth logins live only as pool entries; falling straight + to the singleton auth-store resolver would make auxiliary tasks such as + compression report "no provider configured" even though ``hermes auth + status`` shows xAI OAuth as logged in. + + Falls back to ``hermes_cli.auth``'s singleton runtime resolver for older + auth-store-only logins. Returns ``None`` if the user is not authenticated + with xAI Grok OAuth. """ + try: + from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL + + pool = load_pool("xai-oauth") + if pool and pool.has_credentials(): + entry = pool.select() + if entry is not None: + api_key = str( + getattr(entry, "runtime_api_key", None) + or getattr(entry, "access_token", "") + or "" + ).strip() + base_url = str( + os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/") + or os.getenv("XAI_BASE_URL", "").strip().rstrip("/") + or getattr(entry, "runtime_base_url", None) + or getattr(entry, "base_url", None) + or DEFAULT_XAI_OAUTH_BASE_URL + ).strip().rstrip("/") + if api_key and base_url: + return api_key, base_url + except Exception as exc: + logger.debug("Auxiliary xAI OAuth pool credential resolution failed: %s", exc) + try: from hermes_cli.auth import resolve_xai_oauth_runtime_credentials diff --git a/run_agent.py b/run_agent.py index 85c1128d68e..b3cde9eb1ea 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3237,11 +3237,20 @@ class AIAgent: except Exception: _aux_cfg_provider = "" if client is None or not aux_model: - msg = ( - "⚠ No auxiliary LLM provider configured — context " - "compression will drop middle turns without a summary. " - "Run `hermes setup` or set OPENROUTER_API_KEY." - ) + if _aux_cfg_provider and _aux_cfg_provider != "auto": + msg = ( + "⚠ Configured auxiliary compression provider " + f"'{_aux_cfg_provider}' is unavailable — context " + "compression will drop middle turns without a summary. " + "Check auxiliary.compression in config.yaml and " + "reauthenticate that provider." + ) + else: + msg = ( + "⚠ No auxiliary LLM provider configured — context " + "compression will drop middle turns without a summary. " + "Run `hermes setup` or set OPENROUTER_API_KEY." + ) self._compression_warning = msg self._emit_status(msg) logger.warning( diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 9dd85762956..96f5802f839 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -26,6 +26,7 @@ from agent.auxiliary_client import ( _normalize_aux_provider, _try_payment_fallback, _resolve_auto, + _resolve_xai_oauth_for_aux, _CodexCompletionsAdapter, ) @@ -221,6 +222,77 @@ class TestReadCodexAccessToken: assert result == "plain-token-no-jwt" +class TestResolveXaiOAuthForAux: + def test_uses_pool_backed_credentials_without_singleton(self, tmp_path, monkeypatch): + """Auxiliary xAI OAuth must see pool-only credentials. + + ``hermes auth status`` already reports these as logged in; compression + should not fall through to "no auxiliary provider configured" just + because the singleton auth-store entry is absent. + """ + from agent.credential_pool import AUTH_TYPE_OAUTH, PooledCredential, load_pool + from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + })) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False) + monkeypatch.delenv("XAI_BASE_URL", raising=False) + + pool = load_pool("xai-oauth") + pool.add_entry(PooledCredential( + provider="xai-oauth", + id="xai123", + label="pool-only", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="manual:xai_pkce", + access_token="pool-access-token", + refresh_token="pool-refresh-token", + base_url=DEFAULT_XAI_OAUTH_BASE_URL, + )) + + assert _resolve_xai_oauth_for_aux() == ( + "pool-access-token", + DEFAULT_XAI_OAUTH_BASE_URL, + ) + + def test_pool_backed_credentials_honor_base_url_env_override(self, tmp_path, monkeypatch): + from agent.credential_pool import AUTH_TYPE_OAUTH, PooledCredential, load_pool + from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + })) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setenv("HERMES_XAI_BASE_URL", "https://example.x.ai/v1/") + + pool = load_pool("xai-oauth") + pool.add_entry(PooledCredential( + provider="xai-oauth", + id="xai456", + label="pool-only", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="manual:xai_pkce", + access_token="pool-access-token", + refresh_token="pool-refresh-token", + base_url=DEFAULT_XAI_OAUTH_BASE_URL, + )) + + assert _resolve_xai_oauth_for_aux() == ( + "pool-access-token", + "https://example.x.ai/v1", + ) + + class TestAnthropicOAuthFlag: """Test that OAuth tokens get is_oauth=True in auxiliary Anthropic client.""" From 44b63fc6de3fe2b53eac3109b4a20db41c663195 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Fri, 15 May 2026 21:59:28 -0500 Subject: [PATCH 102/218] fix(tui): allow transcript scroll + Esc during approval/clarify/confirm prompts (#26414) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an approval / clarify / confirm overlay was active, the global input handler in useInputHandlers returned for every key that wasn't Ctrl+C, which silently disabled transcript scrolling. On long threads the context the prompt was asking about often lived above the visible viewport, and being unable to scroll while answering felt like the prompt had locked the UI. ApprovalPrompt also had no Esc handler at all, so the one obvious 'abort' key did nothing during a permission prompt and the user had to memorize Ctrl+C or hunt for the deny number. Fixes: - Extract shouldFallThroughForScroll(key) (pure, exported) covering wheel scrolls, PageUp/PageDown, and Shift+ArrowUp/Down. When a prompt overlay is up and the pressed key is a scroll input, skip the early return so it reaches the existing wheel/PageUp/Shift+arrow handlers below. Plain arrows still drive in-prompt selection — they don't fall through. - ApprovalPrompt now maps Esc to onChoice('deny'), parity with the global Ctrl+C cancellation path that already invokes cancelOverlayFromCtrlC() for approvals. The bottom-of-prompt hint now advertises 'Esc/Ctrl+C deny'. - Extract approvalAction(ch, key, sel) — pure key-dispatch helper for the approval prompt, exported so the regression matrix (Esc, numbers, Enter, arrows, edge clamping, precedence) is testable without mounting Ink. Tests: - useInputHandlers.test.ts: 6 cases covering shouldFallThroughForScroll positives (wheel/PageUp/PageDown/Shift+arrows) and negatives (plain arrows, bare shift, no scroll key). - approvalAction.test.ts: 8 cases covering Esc→deny, numeric mapping, Enter, ↑↓ within bounds, edge clamping, Esc-beats-others precedence, unrelated keystrokes. --- ui-tui/src/__tests__/approvalAction.test.ts | 50 +++++++++++++ ui-tui/src/__tests__/useInputHandlers.test.ts | 42 ++++++++++- ui-tui/src/app/useInputHandlers.ts | 57 ++++++++++++++- ui-tui/src/components/prompts.tsx | 73 ++++++++++++++----- 4 files changed, 201 insertions(+), 21 deletions(-) create mode 100644 ui-tui/src/__tests__/approvalAction.test.ts diff --git a/ui-tui/src/__tests__/approvalAction.test.ts b/ui-tui/src/__tests__/approvalAction.test.ts new file mode 100644 index 00000000000..851b5093448 --- /dev/null +++ b/ui-tui/src/__tests__/approvalAction.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it } from 'vitest' + +import { approvalAction } from '../components/prompts.js' + +describe('approvalAction — pure key dispatch for ApprovalPrompt', () => { + it('maps Esc to deny — parity with global Ctrl+C cancellation', () => { + expect(approvalAction('', { escape: true }, 0)).toEqual({ kind: 'choose', choice: 'deny' }) + expect(approvalAction('', { escape: true }, 2)).toEqual({ kind: 'choose', choice: 'deny' }) + }) + + it('maps number keys 1..4 to once/session/always/deny in registration order', () => { + expect(approvalAction('1', {}, 0)).toEqual({ kind: 'choose', choice: 'once' }) + expect(approvalAction('2', {}, 0)).toEqual({ kind: 'choose', choice: 'session' }) + expect(approvalAction('3', {}, 0)).toEqual({ kind: 'choose', choice: 'always' }) + expect(approvalAction('4', {}, 0)).toEqual({ kind: 'choose', choice: 'deny' }) + }) + + it('ignores out-of-range numbers', () => { + expect(approvalAction('0', {}, 1)).toEqual({ kind: 'noop' }) + expect(approvalAction('5', {}, 1)).toEqual({ kind: 'noop' }) + expect(approvalAction('9', {}, 1)).toEqual({ kind: 'noop' }) + }) + + it('confirms the current selection on Enter', () => { + expect(approvalAction('', { return: true }, 0)).toEqual({ kind: 'choose', choice: 'once' }) + expect(approvalAction('', { return: true }, 3)).toEqual({ kind: 'choose', choice: 'deny' }) + }) + + it('moves selection up/down within bounds', () => { + expect(approvalAction('', { upArrow: true }, 2)).toEqual({ kind: 'move', delta: -1 }) + expect(approvalAction('', { downArrow: true }, 1)).toEqual({ kind: 'move', delta: 1 }) + }) + + it('clamps selection movement at the edges', () => { + expect(approvalAction('', { upArrow: true }, 0)).toEqual({ kind: 'noop' }) + expect(approvalAction('', { downArrow: true }, 3)).toEqual({ kind: 'noop' }) + }) + + it('Esc beats numeric/return — denying is always the first interpretation', () => { + // If a terminal somehow delivers Esc + a digit in the same event, deny + // wins. Documents the precedence so a future refactor doesn't flip it. + expect(approvalAction('1', { escape: true }, 0)).toEqual({ kind: 'choose', choice: 'deny' }) + expect(approvalAction('', { escape: true, return: true }, 1)).toEqual({ kind: 'choose', choice: 'deny' }) + }) + + it('returns noop for unrelated keystrokes (printable letters etc.)', () => { + expect(approvalAction('a', {}, 0)).toEqual({ kind: 'noop' }) + expect(approvalAction(' ', {}, 0)).toEqual({ kind: 'noop' }) + }) +}) diff --git a/ui-tui/src/__tests__/useInputHandlers.test.ts b/ui-tui/src/__tests__/useInputHandlers.test.ts index 066292abfa5..0d3fd69c1ed 100644 --- a/ui-tui/src/__tests__/useInputHandlers.test.ts +++ b/ui-tui/src/__tests__/useInputHandlers.test.ts @@ -1,6 +1,46 @@ import { describe, expect, it, vi } from 'vitest' -import { applyVoiceRecordResponse } from '../app/useInputHandlers.js' +import { applyVoiceRecordResponse, shouldFallThroughForScroll } from '../app/useInputHandlers.js' + +const baseKey = { + downArrow: false, + pageDown: false, + pageUp: false, + shift: false, + upArrow: false, + wheelDown: false, + wheelUp: false +} + +describe('shouldFallThroughForScroll — keep transcript scrolling alive during prompt overlays', () => { + it('falls through for wheel scrolls', () => { + expect(shouldFallThroughForScroll({ ...baseKey, wheelUp: true })).toBe(true) + expect(shouldFallThroughForScroll({ ...baseKey, wheelDown: true })).toBe(true) + }) + + it('falls through for PageUp / PageDown', () => { + expect(shouldFallThroughForScroll({ ...baseKey, pageUp: true })).toBe(true) + expect(shouldFallThroughForScroll({ ...baseKey, pageDown: true })).toBe(true) + }) + + it('falls through for Shift+ArrowUp / Shift+ArrowDown', () => { + expect(shouldFallThroughForScroll({ ...baseKey, shift: true, upArrow: true })).toBe(true) + expect(shouldFallThroughForScroll({ ...baseKey, shift: true, downArrow: true })).toBe(true) + }) + + it('does NOT fall through for plain arrows — those drive in-prompt selection', () => { + expect(shouldFallThroughForScroll({ ...baseKey, upArrow: true })).toBe(false) + expect(shouldFallThroughForScroll({ ...baseKey, downArrow: true })).toBe(false) + }) + + it('does NOT fall through for plain Shift — without an arrow it is a no-op', () => { + expect(shouldFallThroughForScroll({ ...baseKey, shift: true })).toBe(false) + }) + + it('does NOT fall through for unrelated state (no scroll keys held)', () => { + expect(shouldFallThroughForScroll(baseKey)).toBe(false) + }) +}) describe('applyVoiceRecordResponse', () => { it('reverts optimistic REC state when the gateway reports voice busy', () => { diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts index ce25af70edd..59de48a310d 100644 --- a/ui-tui/src/app/useInputHandlers.ts +++ b/ui-tui/src/app/useInputHandlers.ts @@ -23,6 +23,42 @@ import { getUiState } from './uiStore.js' const isCtrl = (key: { ctrl: boolean }, ch: string, target: string) => key.ctrl && ch.toLowerCase() === target +/** + * Approval / clarify / confirm overlays mount their own `useInput` handlers + * for the in-prompt keys (arrows, numbers, Enter, sometimes Esc). The global + * input handler used to early-return for any other key while one of those + * overlays was up, which silently disabled transcript scrolling — the user + * couldn't read context above the prompt that the prompt itself was asking + * about. Returns true when the key is a transcript-scroll input that should + * fall through to the global scroll handlers even while a prompt is active. + * + * Modifier-held wheel (precision mode) is included — a user who wants to + * scroll a single line at a time during a prompt expects it to work. + */ +export function shouldFallThroughForScroll(key: { + downArrow: boolean + pageDown: boolean + pageUp: boolean + shift: boolean + upArrow: boolean + wheelDown: boolean + wheelUp: boolean +}): boolean { + if (key.wheelUp || key.wheelDown) { + return true + } + + if (key.pageUp || key.pageDown) { + return true + } + + if (key.shift && (key.upArrow || key.downArrow)) { + return true + } + + return false +} + export function applyVoiceRecordResponse( response: null | VoiceRecordResponse, starting: boolean, @@ -224,7 +260,18 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult { // handlers must receive keystrokes (arrow keys, numbers, Enter). Only // intercept Ctrl+C here so the user can deny/dismiss — all other keys // fall through to the component-level handlers. - if (overlay.approval || overlay.clarify || overlay.confirm) { + // + // Scroll inputs (wheel / PageUp / PageDown / Shift+↑↓) are special: + // they must reach the transcript scroll handlers below even with a + // prompt up. Long-thread context the prompt is asking about often + // lives above the visible viewport, and being unable to read it while + // answering felt like the prompt had locked the entire UI. Explicitly + // skip the prompt-overlay early-return for scroll keys so they fall + // through to the wheel / PageUp / Shift+arrow handlers below. + const promptOverlay = overlay.approval || overlay.clarify || overlay.confirm + const fallThroughForScroll = promptOverlay && shouldFallThroughForScroll(key) + + if (promptOverlay && !fallThroughForScroll) { if (isCtrl(key, ch, 'c')) { cancelOverlayFromCtrlC() } @@ -298,7 +345,13 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult { patchOverlayState({ picker: false }) } - return + // When a prompt overlay is up and the user pressed a scroll key, fall + // through to the global scroll handlers below instead of returning. + // Otherwise nothing above this comment matched, and there's nothing + // useful to do for an arbitrary key while blocked. + if (!fallThroughForScroll) { + return + } } if (cState.completions.length && cState.input && cState.historyIdx === null && (key.upArrow || key.downArrow)) { diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx index e9d42485d9b..3dfd31be869 100644 --- a/ui-tui/src/components/prompts.tsx +++ b/ui-tui/src/components/prompts.tsx @@ -11,28 +11,65 @@ const OPTS = ['once', 'session', 'always', 'deny'] as const const LABELS = { always: 'Always allow', deny: 'Deny', once: 'Allow once', session: 'Allow this session' } as const const CMD_PREVIEW_LINES = 10 +type ApprovalKey = { + downArrow?: boolean + escape?: boolean + return?: boolean + upArrow?: boolean +} + +type ApprovalAction = + | { kind: 'choose'; choice: (typeof OPTS)[number] } + | { kind: 'move'; delta: -1 | 1 } + | { kind: 'noop' } + +/** + * Pure key-dispatch for the approval prompt — exported so the regression + * matrix (Esc, Ctrl+C-equivalent, number keys, Enter, ↑↓) is testable + * without mounting React + Ink + a fake stdin. The component just maps the + * action onto its own state setters. + * + * Esc and number keys both terminate the prompt; Esc maps to deny (parity + * with the global Ctrl+C handler that already calls cancelOverlayFromCtrlC + * for approvals). Numbers 1..OPTS.length pick the labelled choice. Enter + * confirms the current selection. ↑/↓ moves the selection within bounds. + */ +export function approvalAction(ch: string, key: ApprovalKey, sel: number): ApprovalAction { + if (key.escape) { + return { kind: 'choose', choice: 'deny' } + } + + const n = parseInt(ch, 10) + + if (n >= 1 && n <= OPTS.length) { + return { kind: 'choose', choice: OPTS[n - 1]! } + } + + if (key.return) { + return { kind: 'choose', choice: OPTS[sel]! } + } + + if (key.upArrow && sel > 0) { + return { kind: 'move', delta: -1 } + } + + if (key.downArrow && sel < OPTS.length - 1) { + return { kind: 'move', delta: 1 } + } + + return { kind: 'noop' } +} + export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) { const [sel, setSel] = useState(0) useInput((ch, key) => { - if (key.upArrow && sel > 0) { - setSel(s => s - 1) - } + const action = approvalAction(ch, key, sel) - if (key.downArrow && sel < OPTS.length - 1) { - setSel(s => s + 1) - } - - const n = parseInt(ch, 10) - - if (n >= 1 && n <= OPTS.length) { - onChoice(OPTS[n - 1]!) - - return - } - - if (key.return) { - onChoice(OPTS[sel]!) + if (action.kind === 'choose') { + onChoice(action.choice) + } else if (action.kind === 'move') { + setSel(s => s + action.delta) } }) @@ -71,7 +108,7 @@ export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) { ))} - ↑/↓ select · Enter confirm · 1-4 quick pick · Ctrl+C deny + ↑/↓ select · Enter confirm · 1-4 quick pick · Esc/Ctrl+C deny ) } From a31191c3f57e2463ce4253cb1d95f93c52f3df14 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 20:29:20 -0700 Subject: [PATCH 103/218] fix(docs): unique sidebar keys for duplicate skill categories (#26726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The per-skill sidebar tree from PR #26646 emitted category entries with only a label. Docusaurus derives translation keys from the label (sidebar.docs.category.