mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-31 06:51:29 +00:00
perf(skills-page): lazy-fetch the catalog instead of bundling 34MB into JS (#33809)
PR #33748 grew the live skills index from ~2k skills to ~69k, which made the previous build-time bundling strategy untenable: the skills page's JS chunk was about to balloon from ~1MB to ~35MB. Initial page load on mobile became unusable, search lagged on every keystroke against the 68k-item array, and JSON.parse blocked the main thread at startup. Three changes: 1. extract-skills.py writes skills.json + skills-meta.json into website/static/api/ instead of website/src/data/. Static-served by Vercel as /docs/api/skills.json (gzipped on the wire), same CDN that already serves skills-index.json. 2. skills/index.tsx drops the static import and fetches both files in parallel on mount. Loading state shows '…' for the count; failures surface a small error pill instead of blanking the page. 3. Search is debounced 150ms and runs against a precomputed lowercase haystack stamped onto each row at load time. Before: array-join + toLowerCase per row per keystroke on a 68k array. After: single .includes() per row, deferred until typing settles. Validation: | | before | after | |---|---|---| | skills.json location | src/data/ (bundled) | static/api/ (CDN) | | Largest JS chunk | would be ~35MB at 68k skills | 659 KB | | Initial page render | wait for full parse | immediate, fetch async | | Per-keystroke filter | join+lowercase x 68k rows | single includes x 68k rows | | Debounce | none | 150ms | Built locally for both en and zh-Hans locales; the 34MB skills.json now lives in build/api/ and is served separately rather than inlined into the page's bundle. skills.json and skills-meta.json added to .gitignore — they were already build artifacts, but the gitignore only listed skills-index.json before.
This commit is contained in:
parent
6f9182cb34
commit
a1eaad2fc0
4 changed files with 125 additions and 33 deletions
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Extract skill metadata into website/src/data/skills.json for the Skills Hub page.
|
||||
"""Extract skill metadata into website/static/api/skills.json for the Skills Hub page.
|
||||
|
||||
Two data sources:
|
||||
|
||||
|
|
@ -32,8 +32,12 @@ LOCAL_SKILL_DIRS = [
|
|||
]
|
||||
UNIFIED_INDEX_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
|
||||
LEGACY_INDEX_CACHE_DIR = os.path.join(REPO_ROOT, "skills", "index-cache")
|
||||
OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills.json")
|
||||
META_OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills-meta.json")
|
||||
# Output to static/api/ so the file is CDN-served at /api/skills.json
|
||||
# rather than bundled into the page's JS chunk. At 50k+ skills the
|
||||
# bundled payload was ~26 MB; lazy-fetch keeps the initial page load
|
||||
# fast and shrinks the JS chunk back to a few hundred KB.
|
||||
OUTPUT = os.path.join(REPO_ROOT, "website", "static", "api", "skills.json")
|
||||
META_OUTPUT = os.path.join(REPO_ROOT, "website", "static", "api", "skills-meta.json")
|
||||
|
||||
CATEGORY_LABELS = {
|
||||
"apple": "Apple",
|
||||
|
|
@ -531,7 +535,9 @@ def main():
|
|||
|
||||
os.makedirs(os.path.dirname(OUTPUT), exist_ok=True)
|
||||
with open(OUTPUT, "w", encoding="utf-8") as f:
|
||||
json.dump(all_skills, f, indent=2)
|
||||
# Minified — file is served over the wire, not read by humans.
|
||||
# At 50k+ skills the indented version was ~30% larger.
|
||||
json.dump(all_skills, f, separators=(",", ":"), ensure_ascii=False)
|
||||
|
||||
# Sidecar meta file so the page can render a "Last refreshed" badge
|
||||
# without changing the shape of skills.json.
|
||||
|
|
@ -547,7 +553,7 @@ def main():
|
|||
if index_meta:
|
||||
meta.update(index_meta)
|
||||
with open(META_OUTPUT, "w", encoding="utf-8") as f:
|
||||
json.dump(meta, f, indent=2)
|
||||
json.dump(meta, f, separators=(",", ":"), ensure_ascii=False)
|
||||
|
||||
print(f"Extracted {len(all_skills)} skills to {OUTPUT}")
|
||||
print(f" {len(local)} local ({sum(1 for s in local if s['source'] == 'built-in')} built-in, "
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#!/usr/bin/env node
|
||||
// Runs website/scripts/extract-skills.py and generate-llms-txt.py before
|
||||
// docusaurus build/start so that:
|
||||
// - website/src/data/skills.json (imported by src/pages/skills/index.tsx)
|
||||
// - website/static/api/skills.json (lazy-fetched by src/pages/skills/index.tsx)
|
||||
// - website/static/api/skills-meta.json (sidecar metadata for the Skills Hub)
|
||||
// - website/static/llms.txt (agent-friendly short docs index)
|
||||
// - website/static/llms-full.txt (full docs concat for LLM context)
|
||||
// all exist without contributors remembering to run Python scripts manually.
|
||||
|
|
@ -30,7 +31,7 @@ const scriptDir = dirname(fileURLToPath(import.meta.url));
|
|||
const websiteDir = resolve(scriptDir, "..");
|
||||
const extractScript = join(scriptDir, "extract-skills.py");
|
||||
const llmsScript = join(scriptDir, "generate-llms-txt.py");
|
||||
const outputFile = join(websiteDir, "src", "data", "skills.json");
|
||||
const outputFile = join(websiteDir, "static", "api", "skills.json");
|
||||
const unifiedIndexFile = join(websiteDir, "static", "api", "skills-index.json");
|
||||
const UNIFIED_INDEX_URL =
|
||||
"https://hermes-agent.nousresearch.com/docs/api/skills-index.json";
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue