docs: publish llms.txt and llms-full.txt for agent-friendly ingestion (#18276)

Two machine-readable entry points to the Hermes Agent docs: /llms.txt curated index of every doc page, one link per page with short descriptions. ~17 KB, safe to load into an LLM context window. /llms-full.txt every page under website/docs/ concatenated as markdown. ~1.8 MB. For one-shot ingestion by coding agents and RAG pipelines. Both files are also served from /docs/llms.txt and /docs/llms-full.txt (Docusaurus serves website/static/ under baseUrl=/docs/). Some agents and IDE plugins probe the classic site-root path; the deploy workflow now copies both files to _site root so either URL works. Conforms to the emerging llmstxt.org spec: H1 project name, blockquote summary, short install command, GitHub link, then curated sections mirroring the docs-site navigation (Getting Started, Using Hermes, Features, Messaging, Integrations, Guides, Developer Guide, Reference). Generated by website/scripts/generate-llms-txt.py. Wired into prebuild.mjs so every 'npm run build' and 'npm run start' refreshes the files alongside the existing skills.json extraction. Both outputs are gitignored (same precedent as src/data/skills.json). Descriptions in llms.txt are pulled from each page's frontmatter, so they stay current automatically. All ~80 section slugs are validated against the filesystem at generation time; an invalid slug would fail the prebuild.
2026-05-03 02:11:48 +00:00 · 2026-04-30 23:17:14 -07:00 · 2026-04-30 23:17:14 -07:00 · c6eebfc25a
commit c6eebfc25a
parent cf2b2d31ce
5 changed files with 367 additions and 22 deletions
--- a/website/scripts/prebuild.mjs
+++ b/website/scripts/prebuild.mjs
@ -1,14 +1,18 @@
 #!/usr/bin/env node
-// Runs website/scripts/extract-skills.py before docusaurus build/start so
-// that website/src/data/skills.json (imported by src/pages/skills/index.tsx)
-// exists without contributors needing to remember to run the Python script
-// manually. CI workflows still run the extraction explicitly, which is a
-// no-op duplicate but matches their historical behaviour.
+// Runs website/scripts/extract-skills.py and generate-llms-txt.py before
+// docusaurus build/start so that:
+//   - website/src/data/skills.json (imported by src/pages/skills/index.tsx)
+//   - website/static/llms.txt (agent-friendly short docs index)
+//   - website/static/llms-full.txt (full docs concat for LLM context)
+// all exist without contributors remembering to run Python scripts manually.
+// CI workflows still run the extraction explicitly, which is a no-op duplicate
+// but matches their historical behaviour.
 //
 // If python3 or its deps (pyyaml) aren't available on the local machine, we
 // fall back to writing an empty skills.json so `npm run build` still
-// succeeds — the Skills Hub page just shows an empty state. CI always has
-// the deps installed, so production deploys get real data.
+// succeeds — the Skills Hub page just shows an empty state, and llms.txt
+// generation is skipped. CI always has the deps installed, so production
+// deploys get real data.

 import { spawnSync } from "node:child_process";
 import { mkdirSync, writeFileSync, existsSync } from "node:fs";
@ -18,6 +22,7 @@ import { fileURLToPath } from "node:url";
 const scriptDir = dirname(fileURLToPath(import.meta.url));
 const websiteDir = resolve(scriptDir, "..");
 const extractScript = join(scriptDir, "extract-skills.py");
+const llmsScript = join(scriptDir, "generate-llms-txt.py");
 const outputFile = join(websiteDir, "src", "data", "skills.json");

 function writeEmptyFallback(reason) {
@ -29,22 +34,37 @@ function writeEmptyFallback(reason) {
  );
 }

+function runPython(script, label) {
+  if (!existsSync(script)) {
+    console.warn(`[prebuild] ${label} skipped (script missing)`);
+    return false;
+  }
+  const r = spawnSync("python3", [script], { stdio: "inherit", cwd: websiteDir });
+  if (r.error && r.error.code === "ENOENT") {
+    console.warn(`[prebuild] ${label} skipped (python3 not found)`);
+    return false;
+  }
+  if (r.status !== 0) {
+    console.warn(`[prebuild] ${label} exited with status ${r.status}`);
+    return false;
+  }
+  return true;
+}
+
+// 1) skills.json — required for the Skills Hub page.
 if (!existsSync(extractScript)) {
  writeEmptyFallback("extract script missing");
-  process.exit(0);
+} else {
+  const r = spawnSync("python3", [extractScript], {
+    stdio: "inherit",
+    cwd: websiteDir,
+  });
+  if (r.error && r.error.code === "ENOENT") {
+    writeEmptyFallback("python3 not found");
+  } else if (r.status !== 0) {
+    writeEmptyFallback(`extract-skills.py exited with status ${r.status}`);
+  }
 }

-const result = spawnSync("python3", [extractScript], {
-  stdio: "inherit",
-  cwd: websiteDir,
-});
-
-if (result.error && result.error.code === "ENOENT") {
-  writeEmptyFallback("python3 not found");
-  process.exit(0);
-}
-
-if (result.status !== 0) {
-  writeEmptyFallback(`extract-skills.py exited with status ${result.status}`);
-  process.exit(0);
-}
+// 2) llms.txt + llms-full.txt — agent-friendly docs entrypoints. Non-fatal.
+runPython(llmsScript, "generate-llms-txt.py");