From 5671461c0c58050822284dc6ae32adaca09f6e24 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 25 May 2026 14:48:53 -0700 Subject: [PATCH] =?UTF-8?q?feat(skills):=20add=20code-wiki=20skill=20?= =?UTF-8?q?=E2=80=94=20closes=20#486=20(#32240)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(skills): add code-wiki skill — closes #486 Bundled skill at skills/software-development/code-wiki/ that generates comprehensive documentation for any codebase: project overview, architecture walkthrough with Mermaid flowchart, per-module deep-dives, class diagram, sequence diagrams, getting-started guide, and (when applicable) API reference. Output defaults to ~/.hermes/wikis// (external to repo, like Google CodeWiki); in-repo output supported when user explicitly requests it. Uses only existing Hermes tools (terminal, read_file, search_files, write_file) — no Docker, no external services, no extra dependencies. Works on local repos and GitHub URLs (shallow-clones to a temp dir). Bounded scope defaults (depth 3, cap 10 modules) keep token cost reasonable on large repos. * refactor(skills): move code-wiki to optional-skills Per the 'when in doubt, optional' rule — wiki generation is a 'I want this big thing right now' capability, not daily-driver behavior. Lines up with finance/research/blockchain skills as install-on-demand rather than always loaded. Install via: hermes skills install official/software-development/code-wiki --- .../software-development/code-wiki/SKILL.md | 445 +++++++++++++++++ .../code-wiki/templates/README.md | 31 ++ .../code-wiki/templates/architecture.md | 30 ++ .../code-wiki/templates/getting-started.md | 47 ++ .../code-wiki/templates/module.md | 38 ++ .../docs/reference/optional-skills-catalog.md | 1 + .../software-development-code-wiki.md | 463 ++++++++++++++++++ website/sidebars.ts | 1 + 8 files changed, 1056 insertions(+) create mode 100644 optional-skills/software-development/code-wiki/SKILL.md create mode 100644 optional-skills/software-development/code-wiki/templates/README.md create mode 100644 optional-skills/software-development/code-wiki/templates/architecture.md create mode 100644 optional-skills/software-development/code-wiki/templates/getting-started.md create mode 100644 optional-skills/software-development/code-wiki/templates/module.md create mode 100644 website/docs/user-guide/skills/optional/software-development/software-development-code-wiki.md diff --git a/optional-skills/software-development/code-wiki/SKILL.md b/optional-skills/software-development/code-wiki/SKILL.md new file mode 100644 index 00000000000..93fde8a3d58 --- /dev/null +++ b/optional-skills/software-development/code-wiki/SKILL.md @@ -0,0 +1,445 @@ +--- +name: code-wiki +description: "Generate wiki docs + Mermaid diagrams for any codebase." +version: 0.1.0 +author: Teknium (teknium1), Hermes Agent +license: MIT +platforms: [linux, macos, windows] +metadata: + hermes: + tags: [Documentation, Mermaid, Architecture, Diagrams, Wiki, Code-Analysis] + related_skills: [codebase-inspection, github-repo-management] +--- + +# Code Wiki Skill + +Generate a comprehensive wiki for any codebase — overview, architecture, per-module deep-dives, Mermaid class and sequence diagrams. Inspired by Google CodeWiki, but works on local repos, private repos, and any language. Uses only existing Hermes tools (`terminal`, `read_file`, `search_files`, `write_file`); no Docker, no external services, no extra dependencies. + +This skill produces **reference documentation** (what/how). It does not produce strategic narrative (why — that's a different skill). + +## When to Use + +- User says "document this codebase", "generate a wiki", "make architecture diagrams" +- Onboarding to an unfamiliar repo and wants a structured reference +- User points at a GitHub URL and asks for documentation +- Need a stable artifact (markdown + Mermaid) that renders on GitHub + +Do NOT use this for: +- Single-file or single-function documentation — just answer directly +- API reference for one specific endpoint — use `read_file` and answer inline +- Strategic "why does this exist" narrative — different skill, different purpose +- Codebases the user is actively developing in this session — just answer questions as they come + +## Prerequisites + +- No env vars required. +- `git` on PATH for repo SHA tracking and remote clones. +- Optional: `pygount` for language-breakdown stats (see the `codebase-inspection` skill). + +## How to Run + +Invoke through the `terminal` tool from the target repo's root, then use `read_file` / `search_files` / `write_file` to produce the wiki. Default output location is `~/.hermes/wikis//`. Only write into the repo (`docs/wiki/`) when the user explicitly requests it. + +## Quick Reference + +| Step | Action | +|---|---| +| 1 | Resolve target — local cwd, given path, or `git clone --depth 50 ` to a temp dir | +| 2 | Scan structure — `ls`, `find -maxdepth 3`, manifest files, README | +| 3 | Pick 8–10 modules to document | +| 4 | Write `README.md` (overview + module map) | +| 5 | Write `architecture.md` with Mermaid flowchart | +| 6 | Write per-module docs in `modules/` | +| 7 | Write `diagrams/class-diagram.md` (Mermaid classDiagram) | +| 8 | Write `diagrams/sequences.md` (Mermaid sequenceDiagram, 2–4 workflows) | +| 9 | Write `getting-started.md` | +| 10 | Write `api.md` if applicable, else skip | +| 11 | Write `.codewiki-state.json` | +| 12 | Report paths to user | + +## Procedure + +### 1. Resolve the target + +For a GitHub URL: + +```bash +WIKI_TMP=$(mktemp -d) +git clone --depth 50 "$WIKI_TMP/repo" +cd "$WIKI_TMP/repo" +REPO_SHA=$(git rev-parse HEAD) +REPO_NAME=$(basename .git) +``` + +For a local path (or cwd if none given): + +```bash +cd +REPO_SHA=$(git rev-parse HEAD 2>/dev/null || echo "uncommitted") +REPO_NAME=$(basename "$PWD") +``` + +Then set the output dir: + +```bash +OUTPUT_DIR="$HOME/.hermes/wikis/$REPO_NAME" +mkdir -p "$OUTPUT_DIR/modules" "$OUTPUT_DIR/diagrams" +``` + +### 2. Scan repo structure + +Use the `terminal` tool for the shell work, `read_file` for manifests: + +```bash +# Shallow tree first +ls -la + +# Deeper tree, noise filtered +find . -type d \ + -not -path '*/\.*' \ + -not -path '*/node_modules*' \ + -not -path '*/venv*' \ + -not -path '*/__pycache__*' \ + -not -path '*/dist*' \ + -not -path '*/build*' \ + -not -path '*/target*' \ + -maxdepth 3 | sort + +# Language breakdown (skip if pygount unavailable) +pygount --format=summary \ + --folders-to-skip=".git,node_modules,venv,.venv,__pycache__,.cache,dist,build,target" \ + . 2>/dev/null || true +``` + +Then `read_file` the relevant manifests (`package.json`, `pyproject.toml`, `setup.py`, `Cargo.toml`, `go.mod`, `pom.xml`, `build.gradle`) and the project README. Use `search_files target='files'` to find them rather than guessing names. + +### 3. Pick modules to document + +Cap initial pass at **8–10 modules**. Heuristics by language: + +- Python: top-level packages (dirs with `__init__.py`), plus subsystem dirs +- JS/TS: `src/`, top-level workspace dirs +- Rust: each crate in a workspace, or top-level `src/` dirs +- Go: each top-level package directory +- Mixed/unfamiliar: top-level directories that contain source code (not config, not tests) + +For very large repos, prioritize by: +1. Imported-from count (a module imported by many is core) +2. LOC (bigger modules usually warrant their own doc) +3. Mentions in README / top-level docs + +State the module list to the user before generating per-module docs on big repos — gives them a chance to redirect. + +### 4. Write `README.md` + +`read_file` the actual project README plus the top 2–3 entry-point files. Then `write_file`: + +````markdown +# + + + +## Key Concepts + +- **** — +- **** — + +## Entry Points + +- [`path/to/main.py`]() — +- [`path/to/cli.py`]() — + +## High-Level Architecture + +<2-3 sentences. Detail goes in architecture.md.> + +See [architecture.md](architecture.md). + +## Module Map + +| Module | Purpose | +|---|---| +| [``](modules/.md) | | + +## Getting Started + +See [getting-started.md](getting-started.md). +```` + +For link targets in local mode use relative paths. For cloned repos use `https://github.com///blob//` so links survive future commits. + +### 5. Write `architecture.md` + +````markdown +# Architecture + +<2-3 paragraphs: shape of the system. What talks to what. Where data enters, +where it exits, where state lives.> + +## Components + +- **** — <1-2 sentences>. See [`modules/.md`](modules/.md). + +## System Diagram + +```mermaid +flowchart TD + User([User]) --> Entry[Entry Point] + Entry --> Core[Core Engine] + Core --> StorageA[(Database)] + Core --> ExternalAPI{{External API}} +``` + +## Data Flow + +1. **** — [``]() +2. **** — [``]() + +## Key Design Decisions + +- +```` + +**Mermaid shape semantics:** +- `[]` = component +- `[()]` = database / storage +- `{{}}` = external service +- `(())` = entry point or terminal +- `-->` = sync call, `-.->` = async/event + +Cap at ~20 nodes per diagram. Split into sub-diagrams if larger. + +### 6. Write per-module docs in `modules/` + +For each selected module, inspect its layout with `ls`, identify 3–5 most important files (by size, by being named `core.py` / `main.py` / `__init__.py`, by being imported a lot), then `read_file` those files (use `offset` / `limit` to read only what you need; prefer `search_files` for specific symbols). + +````markdown +# Module: `` + +<1-2 sentence purpose.> + +## Responsibilities + +- +- + +## Key Files + +- [`/`]() — + +## Public API + + + +## Internal Structure + + + +## Dependencies + +- **Used by:** +- **Uses:** + +## Notable Patterns / Gotchas + +- +```` + +### 7. Write `diagrams/class-diagram.md` + +Pick the 5–10 most important classes/types. `read_file` them, then write: + +````markdown +# Class Diagram + +## Core Types + +```mermaid +classDiagram + class Agent { + +string name + +list~Tool~ tools + +chat(message) string + } + class Tool { + <> + +name string + +execute(args) any + } + Agent --> Tool : uses + Tool <|-- TerminalTool + Tool <|-- WebTool +``` + +## Notes + + +```` + +For languages without classes (Go, C, Rust): use the diagram for struct relationships, or skip class-diagram.md and explain it in prose in architecture.md. Don't force-fit. + +### 8. Write `diagrams/sequences.md` + +Pick 2–4 of the most important workflows. Trace each call path through the code (read entry point, follow function calls), then: + +````markdown +# Sequence Diagrams + +## Workflow: + +<1 sentence describing what this does and when it runs.> + +```mermaid +sequenceDiagram + participant User + participant CLI + participant Agent + participant LLM + User->>CLI: types message + CLI->>Agent: chat(message) + Agent->>LLM: API call + LLM-->>Agent: response + tool_calls + Agent->>Agent: execute tools + Agent-->>CLI: final response +``` + +### Walkthrough + +1. **User input** — [`cli.py:HermesCLI.run_session`]() +2. **Message dispatch** — [`run_agent.py:AIAgent.chat`]() +```` + +Don't invent participants. Every box must correspond to a real component the reader can find in the code. + +### 9. Write `getting-started.md` + +````markdown +# Getting Started + +## Prerequisites + + + +## Installation + +```bash + +``` + +## First Run + +```bash + +``` + +## Common Workflows + +### + + +## Configuration + +- `` — +- Env var `` — + +## Where to Go Next + +- Architecture: [architecture.md](architecture.md) +- Module reference: [README.md#module-map](README.md#module-map) +```` + +### 10. Write `api.md` (skip if not applicable) + +Only write this if the project is a library or API server. If it is: + +- Find the public API surface (`__init__.py` exports, OpenAPI specs, route handlers, exported types) +- Document each public entry with signature, parameters, return type, one-line description +- Group by category + +### 11. Write the state file + +```bash +cat > "$OUTPUT_DIR/.codewiki-state.json" </: + README.md project overview, module map + architecture.md system architecture + flowchart + getting-started.md setup, first run, workflows + modules/ per-module deep-dives + diagrams/architecture.md Mermaid flowchart + diagrams/class-diagram.md Mermaid class diagram + diagrams/sequences.md Mermaid sequence diagrams +``` + +If you cloned to a temp dir, remind the user it can be removed (`rm -rf "$WIKI_TMP"`) after they've reviewed the wiki. + +## Scope Control + +Generating a full wiki for a 500K-LOC monorepo is wildly token-expensive. Default to bounded scope: + +- Initial scan: max depth 3 directories +- Per-module docs: cap at 10 modules unless user expands scope +- Per-file reads: prefer `search_files` for symbols + `read_file` with `offset`/`limit` over full reads +- Skip vendored code (`vendor/`, `third_party/`, generated code, `_pb2.py`, `.min.js`) + +If the user says "do the whole thing exhaustively", believe them — but ballpark the cost first: "this repo has ~340 source files, comprehensive coverage will be expensive — confirm?" + +## Re-Run / Update + +If `.codewiki-state.json` already exists at the target path: + +- Read it for previous SHA and module list +- If source SHA matches: ask user if they want to regenerate or skip +- If SHA differs: offer to regenerate only modules with changed files (`git diff --name-only HEAD`) + +Full incremental-regeneration is a future enhancement — for now, regenerating the whole thing is acceptable. + +## Pitfalls + +- **Fabricating components.** Every diagram node and claimed function call must be in the source. `read_file` before writing. The single biggest failure mode for auto-generated docs is plausible-sounding fabrication. +- **Generic AI prose.** "This module is responsible for..." is content-free. Say what the module actually does in domain-specific terms. +- **Restating code as prose.** A module doc that says "the `process` function processes things by calling `process_item` on each item" is worse than just linking to the function. +- **Mermaid > 50 nodes.** They don't render legibly. Split them. +- **Documenting tests, generated code, or vendored deps as if they were product code.** Skip them. +- **In-repo output without asking.** Default is `~/.hermes/wikis/`. Only write into the repo when the user explicitly requests it. +- **Mermaid special chars need quotes:** `A["Tool / Agent"]` not `A[Tool / Agent]`. `
` for line breaks inside a node. +- **Nested code fences in SKILL.md.** When writing a markdown example that contains a Mermaid block, use 4-backtick outer fences so the 3-backtick inner ` ```mermaid ` doesn't close the outer. (This SKILL.md does it.) +- **classDiagram generics** render as `~T~` (e.g. `List~Tool~`), not ``. +- **GitHub Mermaid theme is fixed** — don't include `%%{init: ...}%%` blocks; they're stripped on render. + +## Verification + +After writing, verify: + +1. **Mermaid blocks balance** — opens equal closes per file: + ```bash + for f in "$OUTPUT_DIR"/diagrams/*.md "$OUTPUT_DIR"/architecture.md; do + opens=$(grep -c '^```mermaid' "$f") + total=$(grep -c '^```' "$f") + echo "$f: $opens mermaid blocks, $total total fences (expect total = opens*2)" + done + ``` +2. **All expected files exist** — + ```bash + ls "$OUTPUT_DIR"/{README.md,architecture.md,getting-started.md,.codewiki-state.json} \ + "$OUTPUT_DIR"/modules/ "$OUTPUT_DIR"/diagrams/ + ``` +3. **Module count matches what you intended** — `ls "$OUTPUT_DIR/modules" | wc -l` should equal the number of modules you committed to in Step 3. +4. **No fabricated paths** — sanity-check 2–3 source links resolve to real files. diff --git a/optional-skills/software-development/code-wiki/templates/README.md b/optional-skills/software-development/code-wiki/templates/README.md new file mode 100644 index 00000000000..2fe65cea2e2 --- /dev/null +++ b/optional-skills/software-development/code-wiki/templates/README.md @@ -0,0 +1,31 @@ +# {{PROJECT_NAME}} + +{{ONE_PARAGRAPH_DESCRIPTION}} + +## Key Concepts + +- **{{CONCEPT_1}}** — {{ONE_LINE}} +- **{{CONCEPT_2}}** — {{ONE_LINE}} +- **{{CONCEPT_3}}** — {{ONE_LINE}} + +## Entry Points + +- [`{{PATH_1}}`]({{LINK_1}}) — {{WHAT_IT_DOES}} +- [`{{PATH_2}}`]({{LINK_2}}) — {{WHAT_IT_DOES}} + +## High-Level Architecture + +{{TWO_TO_THREE_SENTENCES}} + +See [architecture.md](architecture.md) for the full picture. + +## Module Map + +| Module | Purpose | +|---|---| +| [`{{MODULE_1}}`](modules/{{MODULE_1}}.md) | {{ONE_LINE_PURPOSE}} | +| [`{{MODULE_2}}`](modules/{{MODULE_2}}.md) | {{ONE_LINE_PURPOSE}} | + +## Getting Started + +See [getting-started.md](getting-started.md). diff --git a/optional-skills/software-development/code-wiki/templates/architecture.md b/optional-skills/software-development/code-wiki/templates/architecture.md new file mode 100644 index 00000000000..e737b2c9814 --- /dev/null +++ b/optional-skills/software-development/code-wiki/templates/architecture.md @@ -0,0 +1,30 @@ +# Architecture + +{{TWO_TO_THREE_PARAGRAPHS_SHAPE_OF_SYSTEM}} + +## Components + +- **{{COMPONENT_1}}** — {{ONE_TO_TWO_SENTENCES}} See [`modules/{{MODULE}}.md`](modules/{{MODULE}}.md). +- **{{COMPONENT_2}}** — {{ONE_TO_TWO_SENTENCES}} + +## System Diagram + +```mermaid +flowchart TD + User([User]) --> Entry[Entry Point] + Entry --> Core[Core Engine] + Core --> StorageA[(Database)] + Core --> ExternalAPI{{External API}} +``` + +## Data Flow + +1. **{{STEP_1}}** — [`{{FILE}}`]({{LINK}}) +2. **{{STEP_2}}** — [`{{FILE}}`]({{LINK}}) +3. **{{STEP_3}}** — [`{{FILE}}`]({{LINK}}) + +## Key Design Decisions + +- {{DECISION_1}} +- {{DECISION_2}} +- {{DECISION_3}} diff --git a/optional-skills/software-development/code-wiki/templates/getting-started.md b/optional-skills/software-development/code-wiki/templates/getting-started.md new file mode 100644 index 00000000000..bbc66dbbe0b --- /dev/null +++ b/optional-skills/software-development/code-wiki/templates/getting-started.md @@ -0,0 +1,47 @@ +# Getting Started + +## Prerequisites + +- {{LANGUAGE_RUNTIME_VERSION}} +- {{DEPENDENCY}} + +## Installation + +```bash +{{INSTALL_COMMANDS}} +``` + +## First Run + +```bash +{{FIRST_RUN_COMMAND}} +``` + +You should see {{EXPECTED_OUTPUT}}. + +## Common Workflows + +### {{WORKFLOW_1}} + +```bash +{{COMMANDS}} +``` + +### {{WORKFLOW_2}} + +```bash +{{COMMANDS}} +``` + +## Configuration + +Key config files and settings: + +- `{{CONFIG_FILE}}` — {{WHAT_IT_CONTROLS}} +- Env var `{{VAR}}` — {{WHAT_IT_CONTROLS}} + +## Where to Go Next + +- Architecture overview: [architecture.md](architecture.md) +- Module reference: [README.md#module-map](README.md#module-map) +- Diagrams: [diagrams/](diagrams/) diff --git a/optional-skills/software-development/code-wiki/templates/module.md b/optional-skills/software-development/code-wiki/templates/module.md new file mode 100644 index 00000000000..8494438f5b4 --- /dev/null +++ b/optional-skills/software-development/code-wiki/templates/module.md @@ -0,0 +1,38 @@ +# Module: `{{MODULE_NAME}}` + +{{ONE_TO_TWO_SENTENCE_PURPOSE}} + +## Responsibilities + +- {{BULLET_1}} +- {{BULLET_2}} +- {{BULLET_3}} + +## Key Files + +- [`{{PATH_1}}`]({{LINK_1}}) — {{WHAT_IT_DOES}} +- [`{{PATH_2}}`]({{LINK_2}}) — {{WHAT_IT_DOES}} + +## Public API + +### `{{FUNCTION_NAME}}({{SIGNATURE}})` + +{{ONE_LINE_DESCRIPTION}} + +**Parameters:** +- `{{PARAM}}` ({{TYPE}}) — {{DESCRIPTION}} + +**Returns:** {{TYPE}} — {{DESCRIPTION}} + +## Internal Structure + +{{HOW_THE_MODULE_IS_ORGANIZED}} + +## Dependencies + +- **Used by:** {{OTHER_MODULES}} +- **Uses:** {{OTHER_MODULES_AND_LIBS}} + +## Notable Patterns / Gotchas + +- {{ANYTHING_NON_OBVIOUS}} diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md index 4671acdfa51..bd2c22eff3c 100644 --- a/website/docs/reference/optional-skills-catalog.md +++ b/website/docs/reference/optional-skills-catalog.md @@ -185,6 +185,7 @@ hermes skills uninstall | Skill | Description | |-------|-------------| +| [**code-wiki**](/user-guide/skills/optional/software-development/software-development-code-wiki) | Generate wiki docs + Mermaid diagrams for any codebase. | | [**rest-graphql-debug**](/user-guide/skills/optional/software-development/software-development-rest-graphql-debug) | Debug REST/GraphQL APIs: status codes, auth, schemas, repro. | ## web-development diff --git a/website/docs/user-guide/skills/optional/software-development/software-development-code-wiki.md b/website/docs/user-guide/skills/optional/software-development/software-development-code-wiki.md new file mode 100644 index 00000000000..7d41054deac --- /dev/null +++ b/website/docs/user-guide/skills/optional/software-development/software-development-code-wiki.md @@ -0,0 +1,463 @@ +--- +title: "Code Wiki — Generate wiki docs + Mermaid diagrams for any codebase" +sidebar_label: "Code Wiki" +description: "Generate wiki docs + Mermaid diagrams for any codebase" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Code Wiki + +Generate wiki docs + Mermaid diagrams for any codebase. + +## Skill metadata + +| | | +|---|---| +| Source | Optional — install with `hermes skills install official/software-development/code-wiki` | +| Path | `optional-skills/software-development/code-wiki` | +| Version | `0.1.0` | +| Author | Teknium (teknium1), Hermes Agent | +| License | MIT | +| Platforms | linux, macos, windows | +| Tags | `Documentation`, `Mermaid`, `Architecture`, `Diagrams`, `Wiki`, `Code-Analysis` | +| Related skills | [`codebase-inspection`](/docs/user-guide/skills/bundled/github/github-codebase-inspection), [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Code Wiki Skill + +Generate a comprehensive wiki for any codebase — overview, architecture, per-module deep-dives, Mermaid class and sequence diagrams. Inspired by Google CodeWiki, but works on local repos, private repos, and any language. Uses only existing Hermes tools (`terminal`, `read_file`, `search_files`, `write_file`); no Docker, no external services, no extra dependencies. + +This skill produces **reference documentation** (what/how). It does not produce strategic narrative (why — that's a different skill). + +## When to Use + +- User says "document this codebase", "generate a wiki", "make architecture diagrams" +- Onboarding to an unfamiliar repo and wants a structured reference +- User points at a GitHub URL and asks for documentation +- Need a stable artifact (markdown + Mermaid) that renders on GitHub + +Do NOT use this for: +- Single-file or single-function documentation — just answer directly +- API reference for one specific endpoint — use `read_file` and answer inline +- Strategic "why does this exist" narrative — different skill, different purpose +- Codebases the user is actively developing in this session — just answer questions as they come + +## Prerequisites + +- No env vars required. +- `git` on PATH for repo SHA tracking and remote clones. +- Optional: `pygount` for language-breakdown stats (see the `codebase-inspection` skill). + +## How to Run + +Invoke through the `terminal` tool from the target repo's root, then use `read_file` / `search_files` / `write_file` to produce the wiki. Default output location is `~/.hermes/wikis//`. Only write into the repo (`docs/wiki/`) when the user explicitly requests it. + +## Quick Reference + +| Step | Action | +|---|---| +| 1 | Resolve target — local cwd, given path, or `git clone --depth 50 ` to a temp dir | +| 2 | Scan structure — `ls`, `find -maxdepth 3`, manifest files, README | +| 3 | Pick 8–10 modules to document | +| 4 | Write `README.md` (overview + module map) | +| 5 | Write `architecture.md` with Mermaid flowchart | +| 6 | Write per-module docs in `modules/` | +| 7 | Write `diagrams/class-diagram.md` (Mermaid classDiagram) | +| 8 | Write `diagrams/sequences.md` (Mermaid sequenceDiagram, 2–4 workflows) | +| 9 | Write `getting-started.md` | +| 10 | Write `api.md` if applicable, else skip | +| 11 | Write `.codewiki-state.json` | +| 12 | Report paths to user | + +## Procedure + +### 1. Resolve the target + +For a GitHub URL: + +```bash +WIKI_TMP=$(mktemp -d) +git clone --depth 50 "$WIKI_TMP/repo" +cd "$WIKI_TMP/repo" +REPO_SHA=$(git rev-parse HEAD) +REPO_NAME=$(basename .git) +``` + +For a local path (or cwd if none given): + +```bash +cd +REPO_SHA=$(git rev-parse HEAD 2>/dev/null || echo "uncommitted") +REPO_NAME=$(basename "$PWD") +``` + +Then set the output dir: + +```bash +OUTPUT_DIR="$HOME/.hermes/wikis/$REPO_NAME" +mkdir -p "$OUTPUT_DIR/modules" "$OUTPUT_DIR/diagrams" +``` + +### 2. Scan repo structure + +Use the `terminal` tool for the shell work, `read_file` for manifests: + +```bash +# Shallow tree first +ls -la + +# Deeper tree, noise filtered +find . -type d \ + -not -path '*/\.*' \ + -not -path '*/node_modules*' \ + -not -path '*/venv*' \ + -not -path '*/__pycache__*' \ + -not -path '*/dist*' \ + -not -path '*/build*' \ + -not -path '*/target*' \ + -maxdepth 3 | sort + +# Language breakdown (skip if pygount unavailable) +pygount --format=summary \ + --folders-to-skip=".git,node_modules,venv,.venv,__pycache__,.cache,dist,build,target" \ + . 2>/dev/null || true +``` + +Then `read_file` the relevant manifests (`package.json`, `pyproject.toml`, `setup.py`, `Cargo.toml`, `go.mod`, `pom.xml`, `build.gradle`) and the project README. Use `search_files target='files'` to find them rather than guessing names. + +### 3. Pick modules to document + +Cap initial pass at **8–10 modules**. Heuristics by language: + +- Python: top-level packages (dirs with `__init__.py`), plus subsystem dirs +- JS/TS: `src/`, top-level workspace dirs +- Rust: each crate in a workspace, or top-level `src/` dirs +- Go: each top-level package directory +- Mixed/unfamiliar: top-level directories that contain source code (not config, not tests) + +For very large repos, prioritize by: +1. Imported-from count (a module imported by many is core) +2. LOC (bigger modules usually warrant their own doc) +3. Mentions in README / top-level docs + +State the module list to the user before generating per-module docs on big repos — gives them a chance to redirect. + +### 4. Write `README.md` + +`read_file` the actual project README plus the top 2–3 entry-point files. Then `write_file`: + +````markdown +# + + + +## Key Concepts + +- **** — +- **** — + +## Entry Points + +- [`path/to/main.py`](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/) — +- [`path/to/cli.py`](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/) — + +## High-Level Architecture + +<2-3 sentences. Detail goes in architecture.md.> + +See [architecture.md](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/architecture.md). + +## Module Map + +| Module | Purpose | +|---|---| +| [``](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/modules/.md) | | + +## Getting Started + +See [getting-started.md](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/getting-started.md). +```` + +For link targets in local mode use relative paths. For cloned repos use `https://github.com///blob//` so links survive future commits. + +### 5. Write `architecture.md` + +````markdown +# Architecture + +<2-3 paragraphs: shape of the system. What talks to what. Where data enters, +where it exits, where state lives.> + +## Components + +- **** — <1-2 sentences>. See [`modules/.md`](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/modules/.md). + +## System Diagram + +```mermaid +flowchart TD + User([User]) --> Entry[Entry Point] + Entry --> Core[Core Engine] + Core --> StorageA[(Database)] + Core --> ExternalAPI{{External API}} +``` + +## Data Flow + +1. **** — [``](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/) +2. **** — [``](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/) + +## Key Design Decisions + +- +```` + +**Mermaid shape semantics:** +- `[]` = component +- `[()]` = database / storage +- `{{}}` = external service +- `(())` = entry point or terminal +- `-->` = sync call, `-.->` = async/event + +Cap at ~20 nodes per diagram. Split into sub-diagrams if larger. + +### 6. Write per-module docs in `modules/` + +For each selected module, inspect its layout with `ls`, identify 3–5 most important files (by size, by being named `core.py` / `main.py` / `__init__.py`, by being imported a lot), then `read_file` those files (use `offset` / `limit` to read only what you need; prefer `search_files` for specific symbols). + +````markdown +# Module: `` + +<1-2 sentence purpose.> + +## Responsibilities + +- +- + +## Key Files + +- [`/`](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/) — + +## Public API + + + +## Internal Structure + + + +## Dependencies + +- **Used by:** +- **Uses:** + +## Notable Patterns / Gotchas + +- +```` + +### 7. Write `diagrams/class-diagram.md` + +Pick the 5–10 most important classes/types. `read_file` them, then write: + +````markdown +# Class Diagram + +## Core Types + +```mermaid +classDiagram + class Agent { + +string name + +list~Tool~ tools + +chat(message) string + } + class Tool { + <> + +name string + +execute(args) any + } + Agent --> Tool : uses + Tool <|-- TerminalTool + Tool <|-- WebTool +``` + +## Notes + + +```` + +For languages without classes (Go, C, Rust): use the diagram for struct relationships, or skip class-diagram.md and explain it in prose in architecture.md. Don't force-fit. + +### 8. Write `diagrams/sequences.md` + +Pick 2–4 of the most important workflows. Trace each call path through the code (read entry point, follow function calls), then: + +````markdown +# Sequence Diagrams + +## Workflow: + +<1 sentence describing what this does and when it runs.> + +```mermaid +sequenceDiagram + participant User + participant CLI + participant Agent + participant LLM + User->>CLI: types message + CLI->>Agent: chat(message) + Agent->>LLM: API call + LLM-->>Agent: response + tool_calls + Agent->>Agent: execute tools + Agent-->>CLI: final response +``` + +### Walkthrough + +1. **User input** — [`cli.py:HermesCLI.run_session`](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/) +2. **Message dispatch** — [`run_agent.py:AIAgent.chat`](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/) +```` + +Don't invent participants. Every box must correspond to a real component the reader can find in the code. + +### 9. Write `getting-started.md` + +````markdown +# Getting Started + +## Prerequisites + + + +## Installation + +```bash + +``` + +## First Run + +```bash + +``` + +## Common Workflows + +### + + +## Configuration + +- `` — +- Env var `` — + +## Where to Go Next + +- Architecture: [architecture.md](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/architecture.md) +- Module reference: [README.md#module-map](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/software-development/code-wiki/README.md#module-map) +```` + +### 10. Write `api.md` (skip if not applicable) + +Only write this if the project is a library or API server. If it is: + +- Find the public API surface (`__init__.py` exports, OpenAPI specs, route handlers, exported types) +- Document each public entry with signature, parameters, return type, one-line description +- Group by category + +### 11. Write the state file + +```bash +cat > "$OUTPUT_DIR/.codewiki-state.json" </: + README.md project overview, module map + architecture.md system architecture + flowchart + getting-started.md setup, first run, workflows + modules/ per-module deep-dives + diagrams/architecture.md Mermaid flowchart + diagrams/class-diagram.md Mermaid class diagram + diagrams/sequences.md Mermaid sequence diagrams +``` + +If you cloned to a temp dir, remind the user it can be removed (`rm -rf "$WIKI_TMP"`) after they've reviewed the wiki. + +## Scope Control + +Generating a full wiki for a 500K-LOC monorepo is wildly token-expensive. Default to bounded scope: + +- Initial scan: max depth 3 directories +- Per-module docs: cap at 10 modules unless user expands scope +- Per-file reads: prefer `search_files` for symbols + `read_file` with `offset`/`limit` over full reads +- Skip vendored code (`vendor/`, `third_party/`, generated code, `_pb2.py`, `.min.js`) + +If the user says "do the whole thing exhaustively", believe them — but ballpark the cost first: "this repo has ~340 source files, comprehensive coverage will be expensive — confirm?" + +## Re-Run / Update + +If `.codewiki-state.json` already exists at the target path: + +- Read it for previous SHA and module list +- If source SHA matches: ask user if they want to regenerate or skip +- If SHA differs: offer to regenerate only modules with changed files (`git diff --name-only HEAD`) + +Full incremental-regeneration is a future enhancement — for now, regenerating the whole thing is acceptable. + +## Pitfalls + +- **Fabricating components.** Every diagram node and claimed function call must be in the source. `read_file` before writing. The single biggest failure mode for auto-generated docs is plausible-sounding fabrication. +- **Generic AI prose.** "This module is responsible for..." is content-free. Say what the module actually does in domain-specific terms. +- **Restating code as prose.** A module doc that says "the `process` function processes things by calling `process_item` on each item" is worse than just linking to the function. +- **Mermaid > 50 nodes.** They don't render legibly. Split them. +- **Documenting tests, generated code, or vendored deps as if they were product code.** Skip them. +- **In-repo output without asking.** Default is `~/.hermes/wikis/`. Only write into the repo when the user explicitly requests it. +- **Mermaid special chars need quotes:** `A["Tool / Agent"]` not `A[Tool / Agent]`. `
` for line breaks inside a node. +- **Nested code fences in SKILL.md.** When writing a markdown example that contains a Mermaid block, use 4-backtick outer fences so the 3-backtick inner ` ```mermaid ` doesn't close the outer. (This SKILL.md does it.) +- **classDiagram generics** render as `~T~` (e.g. `List~Tool~`), not ``. +- **GitHub Mermaid theme is fixed** — don't include `%%{init: ...}%%` blocks; they're stripped on render. + +## Verification + +After writing, verify: + +1. **Mermaid blocks balance** — opens equal closes per file: + ```bash + for f in "$OUTPUT_DIR"/diagrams/*.md "$OUTPUT_DIR"/architecture.md; do + opens=$(grep -c '^```mermaid' "$f") + total=$(grep -c '^```' "$f") + echo "$f: $opens mermaid blocks, $total total fences (expect total = opens*2)" + done + ``` +2. **All expected files exist** — + ```bash + ls "$OUTPUT_DIR"/{README.md,architecture.md,getting-started.md,.codewiki-state.json} \ + "$OUTPUT_DIR"/modules/ "$OUTPUT_DIR"/diagrams/ + ``` +3. **Module count matches what you intended** — `ls "$OUTPUT_DIR/modules" | wc -l` should equal the number of modules you committed to in Step 3. +4. **No fabricated paths** — sanity-check 2–3 source links resolve to real files. diff --git a/website/sidebars.ts b/website/sidebars.ts index b0cd3a470fd..a3c41cbf205 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -589,6 +589,7 @@ const sidebars: SidebarsConfig = { key: 'skills-optional-software-development', collapsed: true, items: [ + 'user-guide/skills/optional/software-development/software-development-code-wiki', 'user-guide/skills/optional/software-development/software-development-rest-graphql-debug', ], },