From ace4b722dc2ba716b1beb9de5b681453b301457d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 7 Jun 2026 22:02:41 -0700 Subject: [PATCH] =?UTF-8?q?feat(skills):=20add=20simplify-code=20skill=20?= =?UTF-8?q?=E2=80=94=20parallel=203-agent=20code=20review=20and=20cleanup?= =?UTF-8?q?=20(#41691)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inspired by Claude Code's /simplify. A bundled skill that captures recent changes via git diff, fans out three focused reviewers (reuse, quality, efficiency) via delegate_task batch mode, then aggregates findings and applies the fixes worth applying. Zero core changes — orchestrates existing tools (terminal/git, search_files, delegate_task). Supports focus, dry-run, and scoped-diff modifiers. Closes #379. --- .../simplify-code/SKILL.md | 175 ++++++++++++++++ website/docs/reference/skills-catalog.md | 1 + .../software-development-simplify-code.md | 193 ++++++++++++++++++ website/sidebars.ts | 1 + 4 files changed, 370 insertions(+) create mode 100644 skills/software-development/simplify-code/SKILL.md create mode 100644 website/docs/user-guide/skills/bundled/software-development/software-development-simplify-code.md diff --git a/skills/software-development/simplify-code/SKILL.md b/skills/software-development/simplify-code/SKILL.md new file mode 100644 index 00000000000..63c3e11cefa --- /dev/null +++ b/skills/software-development/simplify-code/SKILL.md @@ -0,0 +1,175 @@ +--- +name: simplify-code +description: "Parallel 3-agent cleanup of recent code changes." +version: 1.0.0 +author: Hermes Agent (inspired by Claude Code /simplify) +license: MIT +platforms: [linux, macos, windows] +metadata: + hermes: + tags: [code-review, cleanup, refactor, delegation, subagent, parallel, simplify] + related_skills: [requesting-code-review, test-driven-development, plan] +--- + +# Simplify Code — Parallel Review & Cleanup + +Review your recent code changes with three focused reviewers running in +parallel, aggregate their findings, and apply the fixes worth applying. + +**Core principle:** Three narrow reviewers beat one broad reviewer. Each one +deeply searches the codebase for a single class of problem — reuse, quality, +efficiency — without diluting its attention across all three. They run +concurrently, so you pay the latency of one review, not three. + +## When to Use + +Trigger this skill when the user says any of: + +- "simplify" / "simplify my changes" / "simplify these changes" +- "review my code" / "review my recent changes" / "clean up my changes" +- "/simplify" (if they're carrying the Claude Code habit over) + +Optional modifiers the user may add — honor them: + +- **Focus:** "simplify focus on efficiency" → run only the efficiency reviewer + (or weight the aggregation toward it). Recognized focuses: `reuse`, + `quality`, `efficiency`. +- **Dry run:** "simplify but don't change anything" / "just report" → run the + three reviewers, present findings, apply NOTHING. Ask before applying. +- **Scope:** "simplify the last commit" / "simplify staged" / "simplify + src/foo.py" → narrow the diff source accordingly (see Phase 1). + +Do NOT auto-run this after every edit. It costs three subagents' worth of +tokens — invoke it only when the user explicitly asks. + +## The Process + +### Phase 1 — Identify the changes + +Capture the diff to review. Pick the source by what the user asked for, in +this default order: + +```bash +# 1. Default: uncommitted working-tree changes (tracked files) +git diff + +# 2. If that's empty, include staged changes +git diff HEAD + +# 3. Scoped variants the user may request: +git diff --staged # "staged changes" +git diff HEAD~1 # "the last commit" +git diff main...HEAD # "this branch" / "my PR" +git diff -- src/foo.py # specific file(s) +``` + +If `git diff` and `git diff HEAD` are both empty and there's no git repo or no +changes, fall back to the files the user explicitly named or that were +recently created/edited in this session. If you genuinely can't find any +changed code, say so and stop — there's nothing to simplify. + +Capture the full diff text. Note its size: if it's very large (say >2000 +changed lines), warn the user that three subagents each carrying the full diff +will be token-heavy, and offer to scope it down (per-directory, per-commit) +before proceeding. + +### Phase 2 — Launch three reviewers in parallel + +Use `delegate_task` **batch mode** — pass all three tasks in one `tasks` +array so they run concurrently. Three is the right fan-out for this pattern; +it's well within the `delegation.max_concurrent_children` budget on any +default install. + +Give **every** reviewer the **complete diff** (not fragments — cross-file +issues hide in the gaps) plus the absolute repo path so they can search the +wider codebase. Each reviewer gets `terminal`, `file`, and `search` +toolsets (so they can `git`, `read_file`, and `search_files`/grep). + +Tell each reviewer to: +- Search the existing codebase for evidence (don't reason from the diff alone). +- Report findings as a concrete list: `file:line → problem → suggested fix`. +- Rank each finding `high` / `medium` / `low` confidence. +- Skip nits and style-only churn. Only flag things that materially improve + the code. + +Pass these three goals (drop any the user's focus excludes): + +**Reviewer 1 — Code Reuse** +> Review this diff for code that duplicates functionality already in the +> codebase. Search utility modules, shared helpers, and adjacent files +> (use search_files / grep) for existing functions, constants, or patterns +> the new code could call instead of reimplementing. Flag: new functions +> that duplicate existing ones; hand-rolled logic that an existing utility +> already does (manual string/path manipulation, custom env checks, ad-hoc +> type guards, re-implemented parsing). For each, name the existing thing to +> use and where it lives. + +**Reviewer 2 — Code Quality** +> Review this diff for quality problems. Look for: redundant state (values +> that duplicate or could be derived from existing state; caches that don't +> need to exist); parameter sprawl (new params bolted on where the function +> should have been restructured); copy-paste-with-variation (near-duplicate +> blocks that should share an abstraction); leaky abstractions (exposing +> internals, breaking an existing encapsulation boundary); stringly-typed +> code (raw strings where a constant/enum/registry already exists — check the +> canonical registries before flagging). For each, give the concrete refactor. + +**Reviewer 3 — Efficiency** +> Review this diff for efficiency problems. Look for: unnecessary work +> (redundant computation, repeated file reads, duplicate API calls, N+1 +> access patterns); missed concurrency (independent ops run sequentially); +> hot-path bloat (heavy/blocking work on startup or per-request paths); +> TOCTOU anti-patterns (existence pre-checks before an op instead of doing +> the op and handling the error); memory issues (unbounded growth, missing +> cleanup, listener/handle leaks); overly broad reads (loading whole files +> when a slice would do). For each, give the concrete fix and why it's faster +> or lighter. + +### Phase 3 — Aggregate and apply + +Wait for all three to return (batch mode returns them together). + +1. **Merge** the findings into one list, deduping where reviewers overlap. +2. **Discard false positives** — you have the most context; you don't have to + argue with a reviewer, just drop weak or wrong suggestions silently. +3. **Resolve conflicts.** Reviewers can disagree (Reviewer 1: "use existing + util X"; Reviewer 3: "X is slow, inline it"). Default resolution order: + **correctness > the user's stated focus > readability/reuse > micro-perf.** + Don't apply a perf "fix" that hurts clarity unless the path is genuinely + hot. When two suggestions are mutually exclusive and both defensible, pick + the one that touches less code and note the alternative. +4. **Apply** the surviving fixes directly with `patch` / `write_file` — unless + the user asked for a dry run, in which case present the list and ask first. +5. **Verify** you didn't break anything: run the project's targeted tests for + the touched files (not the full suite), and re-run any linter/type check the + repo uses. If a fix breaks a test, revert that one fix and report it. +6. **Summarize** what you changed: a short list of applied fixes grouped by + reviewer category, plus any findings you deliberately skipped and why. + +## Pitfalls + +- **Don't fan out wider than ~3.** More reviewers means more cost and more + conflicting suggestions to reconcile, not better coverage. Three categories + cover the space. +- **Give the WHOLE diff to each reviewer.** Splitting the diff across reviewers + defeats the design — cross-file duplication and N+1s only show up with the + full picture. +- **Reviewers search, they don't guess.** A reuse finding with no pointer to + the existing utility ("there's probably a helper for this") is noise. Require + `file:line` evidence; drop findings that lack it. +- **Apply ≠ rewrite.** This is cleanup of the user's recent changes, not a + license to refactor the whole module. Keep edits scoped to what the diff + touched plus the minimal surrounding change a fix requires. +- **Respect project conventions.** If the repo has AGENTS.md / CLAUDE.md / + HERMES.md or a linter config, fold those rules into the reviewer prompts so + suggestions match house style instead of fighting it. +- **Large diffs blow context.** If the diff is huge, scope it down before + delegating — three subagents each carrying a 5000-line diff is expensive and + may truncate. + +## Related + +If your install has the `subagent-driven-development` skill (optional), it +covers the complementary case: parallel review *during* implementation, per +task. This skill is the standalone *after-the-fact* cleanup pass. Use +`requesting-code-review` for the pre-commit security/quality gate. diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md index 0ecf856cf28..25325e1f6a5 100644 --- a/website/docs/reference/skills-catalog.md +++ b/website/docs/reference/skills-catalog.md @@ -166,6 +166,7 @@ If a skill is missing from this list but present in the repo, the catalog is reg | [`plan`](/docs/user-guide/skills/bundled/software-development/software-development-plan) | Plan mode: write an actionable markdown plan to .hermes/plans/, no execution. Bite-sized tasks, exact paths, complete code. | `software-development/plan` | | [`python-debugpy`](/docs/user-guide/skills/bundled/software-development/software-development-python-debugpy) | Debug Python: pdb REPL + debugpy remote (DAP). | `software-development/python-debugpy` | | [`requesting-code-review`](/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review) | Pre-commit review: security scan, quality gates, auto-fix. | `software-development/requesting-code-review` | +| [`simplify-code`](/docs/user-guide/skills/bundled/software-development/software-development-simplify-code) | Parallel 3-agent cleanup of recent code changes. | `software-development/simplify-code` | | [`spike`](/docs/user-guide/skills/bundled/software-development/software-development-spike) | Throwaway experiments to validate an idea before build. | `software-development/spike` | | [`systematic-debugging`](/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging) | 4-phase root cause debugging: understand bugs before fixing. | `software-development/systematic-debugging` | | [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development) | TDD: enforce RED-GREEN-REFACTOR, tests before code. | `software-development/test-driven-development` | diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-simplify-code.md b/website/docs/user-guide/skills/bundled/software-development/software-development-simplify-code.md new file mode 100644 index 00000000000..51191414e7a --- /dev/null +++ b/website/docs/user-guide/skills/bundled/software-development/software-development-simplify-code.md @@ -0,0 +1,193 @@ +--- +title: "Simplify Code — Parallel 3-agent cleanup of recent code changes" +sidebar_label: "Simplify Code" +description: "Parallel 3-agent cleanup of recent code changes" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Simplify Code + +Parallel 3-agent cleanup of recent code changes. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/software-development/simplify-code` | +| Version | `1.0.0` | +| Author | Hermes Agent (inspired by Claude Code /simplify) | +| License | MIT | +| Platforms | linux, macos, windows | +| Tags | `code-review`, `cleanup`, `refactor`, `delegation`, `subagent`, `parallel`, `simplify` | +| Related skills | [`requesting-code-review`](/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review), [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development), [`plan`](/docs/user-guide/skills/bundled/software-development/software-development-plan) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Simplify Code — Parallel Review & Cleanup + +Review your recent code changes with three focused reviewers running in +parallel, aggregate their findings, and apply the fixes worth applying. + +**Core principle:** Three narrow reviewers beat one broad reviewer. Each one +deeply searches the codebase for a single class of problem — reuse, quality, +efficiency — without diluting its attention across all three. They run +concurrently, so you pay the latency of one review, not three. + +## When to Use + +Trigger this skill when the user says any of: + +- "simplify" / "simplify my changes" / "simplify these changes" +- "review my code" / "review my recent changes" / "clean up my changes" +- "/simplify" (if they're carrying the Claude Code habit over) + +Optional modifiers the user may add — honor them: + +- **Focus:** "simplify focus on efficiency" → run only the efficiency reviewer + (or weight the aggregation toward it). Recognized focuses: `reuse`, + `quality`, `efficiency`. +- **Dry run:** "simplify but don't change anything" / "just report" → run the + three reviewers, present findings, apply NOTHING. Ask before applying. +- **Scope:** "simplify the last commit" / "simplify staged" / "simplify + src/foo.py" → narrow the diff source accordingly (see Phase 1). + +Do NOT auto-run this after every edit. It costs three subagents' worth of +tokens — invoke it only when the user explicitly asks. + +## The Process + +### Phase 1 — Identify the changes + +Capture the diff to review. Pick the source by what the user asked for, in +this default order: + +```bash +# 1. Default: uncommitted working-tree changes (tracked files) +git diff + +# 2. If that's empty, include staged changes +git diff HEAD + +# 3. Scoped variants the user may request: +git diff --staged # "staged changes" +git diff HEAD~1 # "the last commit" +git diff main...HEAD # "this branch" / "my PR" +git diff -- src/foo.py # specific file(s) +``` + +If `git diff` and `git diff HEAD` are both empty and there's no git repo or no +changes, fall back to the files the user explicitly named or that were +recently created/edited in this session. If you genuinely can't find any +changed code, say so and stop — there's nothing to simplify. + +Capture the full diff text. Note its size: if it's very large (say >2000 +changed lines), warn the user that three subagents each carrying the full diff +will be token-heavy, and offer to scope it down (per-directory, per-commit) +before proceeding. + +### Phase 2 — Launch three reviewers in parallel + +Use `delegate_task` **batch mode** — pass all three tasks in one `tasks` +array so they run concurrently. Three is the right fan-out for this pattern; +it's well within the `delegation.max_concurrent_children` budget on any +default install. + +Give **every** reviewer the **complete diff** (not fragments — cross-file +issues hide in the gaps) plus the absolute repo path so they can search the +wider codebase. Each reviewer gets `terminal`, `file`, and `search` +toolsets (so they can `git`, `read_file`, and `search_files`/grep). + +Tell each reviewer to: +- Search the existing codebase for evidence (don't reason from the diff alone). +- Report findings as a concrete list: `file:line → problem → suggested fix`. +- Rank each finding `high` / `medium` / `low` confidence. +- Skip nits and style-only churn. Only flag things that materially improve + the code. + +Pass these three goals (drop any the user's focus excludes): + +**Reviewer 1 — Code Reuse** +> Review this diff for code that duplicates functionality already in the +> codebase. Search utility modules, shared helpers, and adjacent files +> (use search_files / grep) for existing functions, constants, or patterns +> the new code could call instead of reimplementing. Flag: new functions +> that duplicate existing ones; hand-rolled logic that an existing utility +> already does (manual string/path manipulation, custom env checks, ad-hoc +> type guards, re-implemented parsing). For each, name the existing thing to +> use and where it lives. + +**Reviewer 2 — Code Quality** +> Review this diff for quality problems. Look for: redundant state (values +> that duplicate or could be derived from existing state; caches that don't +> need to exist); parameter sprawl (new params bolted on where the function +> should have been restructured); copy-paste-with-variation (near-duplicate +> blocks that should share an abstraction); leaky abstractions (exposing +> internals, breaking an existing encapsulation boundary); stringly-typed +> code (raw strings where a constant/enum/registry already exists — check the +> canonical registries before flagging). For each, give the concrete refactor. + +**Reviewer 3 — Efficiency** +> Review this diff for efficiency problems. Look for: unnecessary work +> (redundant computation, repeated file reads, duplicate API calls, N+1 +> access patterns); missed concurrency (independent ops run sequentially); +> hot-path bloat (heavy/blocking work on startup or per-request paths); +> TOCTOU anti-patterns (existence pre-checks before an op instead of doing +> the op and handling the error); memory issues (unbounded growth, missing +> cleanup, listener/handle leaks); overly broad reads (loading whole files +> when a slice would do). For each, give the concrete fix and why it's faster +> or lighter. + +### Phase 3 — Aggregate and apply + +Wait for all three to return (batch mode returns them together). + +1. **Merge** the findings into one list, deduping where reviewers overlap. +2. **Discard false positives** — you have the most context; you don't have to + argue with a reviewer, just drop weak or wrong suggestions silently. +3. **Resolve conflicts.** Reviewers can disagree (Reviewer 1: "use existing + util X"; Reviewer 3: "X is slow, inline it"). Default resolution order: + **correctness > the user's stated focus > readability/reuse > micro-perf.** + Don't apply a perf "fix" that hurts clarity unless the path is genuinely + hot. When two suggestions are mutually exclusive and both defensible, pick + the one that touches less code and note the alternative. +4. **Apply** the surviving fixes directly with `patch` / `write_file` — unless + the user asked for a dry run, in which case present the list and ask first. +5. **Verify** you didn't break anything: run the project's targeted tests for + the touched files (not the full suite), and re-run any linter/type check the + repo uses. If a fix breaks a test, revert that one fix and report it. +6. **Summarize** what you changed: a short list of applied fixes grouped by + reviewer category, plus any findings you deliberately skipped and why. + +## Pitfalls + +- **Don't fan out wider than ~3.** More reviewers means more cost and more + conflicting suggestions to reconcile, not better coverage. Three categories + cover the space. +- **Give the WHOLE diff to each reviewer.** Splitting the diff across reviewers + defeats the design — cross-file duplication and N+1s only show up with the + full picture. +- **Reviewers search, they don't guess.** A reuse finding with no pointer to + the existing utility ("there's probably a helper for this") is noise. Require + `file:line` evidence; drop findings that lack it. +- **Apply ≠ rewrite.** This is cleanup of the user's recent changes, not a + license to refactor the whole module. Keep edits scoped to what the diff + touched plus the minimal surrounding change a fix requires. +- **Respect project conventions.** If the repo has AGENTS.md / CLAUDE.md / + HERMES.md or a linter config, fold those rules into the reviewer prompts so + suggestions match house style instead of fighting it. +- **Large diffs blow context.** If the diff is huge, scope it down before + delegating — three subagents each carrying a 5000-line diff is expensive and + may truncate. + +## Related + +If your install has the `subagent-driven-development` skill (optional), it +covers the complementary case: parallel review *during* implementation, per +task. This skill is the standalone *after-the-fact* cleanup pass. Use +`requesting-code-review` for the pre-commit security/quality gate. diff --git a/website/sidebars.ts b/website/sidebars.ts index 0454b8d5363..7705ca565a0 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -331,6 +331,7 @@ const sidebars: SidebarsConfig = { 'user-guide/skills/bundled/software-development/software-development-plan', 'user-guide/skills/bundled/software-development/software-development-python-debugpy', 'user-guide/skills/bundled/software-development/software-development-requesting-code-review', + 'user-guide/skills/bundled/software-development/software-development-simplify-code', 'user-guide/skills/bundled/software-development/software-development-spike', 'user-guide/skills/bundled/software-development/software-development-systematic-debugging', 'user-guide/skills/bundled/software-development/software-development-test-driven-development',