From ded194eb6aca6c7999589168b19d139d1b785316 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 9 May 2026 18:44:12 -0700 Subject: [PATCH] chore(skills): move heavy training skills + outlines to optional-skills (#22912) These skills require heavy GPU/CUDA stacks or are niche enough that they shouldn't be active by default. Moved to optional-skills/ where users opt-in via `hermes skills install official/...`. Moved: - mlops/training/axolotl - mlops/training/trl-fine-tuning - mlops/training/unsloth - mlops/inference/outlines Counts: 91 -> 87 built-in, 72 -> 76 optional. Auto-regenerated docs (per-skill pages + catalogs) reflect the move. --- .../mlops/inference/outlines/SKILL.md | 0 .../inference/outlines/references/backends.md | 0 .../inference/outlines/references/examples.md | 0 .../outlines/references/json_generation.md | 0 .../mlops/training/axolotl/SKILL.md | 0 .../mlops/training/axolotl/references/api.md | 0 .../axolotl/references/dataset-formats.md | 0 .../mlops/training/axolotl/references/index.md | 0 .../mlops/training/axolotl/references/other.md | 0 .../mlops/training/trl-fine-tuning/SKILL.md | 0 .../trl-fine-tuning/references/dpo-variants.md | 0 .../trl-fine-tuning/references/grpo-training.md | 0 .../trl-fine-tuning/references/online-rl.md | 0 .../references/reward-modeling.md | 0 .../trl-fine-tuning/references/sft-training.md | 0 .../templates/basic_grpo_training.py | 0 .../mlops/training/unsloth/SKILL.md | 0 .../mlops/training/unsloth/references/index.md | 0 .../training/unsloth/references/llms-full.md | 0 .../training/unsloth/references/llms-txt.md | 0 .../mlops/training/unsloth/references/llms.md | 0 .../docs/reference/optional-skills-catalog.md | 4 ++++ website/docs/reference/skills-catalog.md | 4 ---- .../mlops/mlops-inference-outlines.md | 4 ++-- .../mlops/mlops-training-axolotl.md | 4 ++-- .../mlops/mlops-training-trl-fine-tuning.md | 16 ++++++++-------- .../mlops/mlops-training-unsloth.md | 4 ++-- 27 files changed, 18 insertions(+), 18 deletions(-) rename {skills => optional-skills}/mlops/inference/outlines/SKILL.md (100%) rename {skills => optional-skills}/mlops/inference/outlines/references/backends.md (100%) rename {skills => optional-skills}/mlops/inference/outlines/references/examples.md (100%) rename {skills => optional-skills}/mlops/inference/outlines/references/json_generation.md (100%) rename {skills => optional-skills}/mlops/training/axolotl/SKILL.md (100%) rename {skills => optional-skills}/mlops/training/axolotl/references/api.md (100%) rename {skills => optional-skills}/mlops/training/axolotl/references/dataset-formats.md (100%) rename {skills => optional-skills}/mlops/training/axolotl/references/index.md (100%) rename {skills => optional-skills}/mlops/training/axolotl/references/other.md (100%) rename {skills => optional-skills}/mlops/training/trl-fine-tuning/SKILL.md (100%) rename {skills => optional-skills}/mlops/training/trl-fine-tuning/references/dpo-variants.md (100%) rename {skills => optional-skills}/mlops/training/trl-fine-tuning/references/grpo-training.md (100%) rename {skills => optional-skills}/mlops/training/trl-fine-tuning/references/online-rl.md (100%) rename {skills => optional-skills}/mlops/training/trl-fine-tuning/references/reward-modeling.md (100%) rename {skills => optional-skills}/mlops/training/trl-fine-tuning/references/sft-training.md (100%) rename {skills => optional-skills}/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py (100%) rename {skills => optional-skills}/mlops/training/unsloth/SKILL.md (100%) rename {skills => optional-skills}/mlops/training/unsloth/references/index.md (100%) rename {skills => optional-skills}/mlops/training/unsloth/references/llms-full.md (100%) rename {skills => optional-skills}/mlops/training/unsloth/references/llms-txt.md (100%) rename {skills => optional-skills}/mlops/training/unsloth/references/llms.md (100%) rename website/docs/user-guide/skills/{bundled => optional}/mlops/mlops-inference-outlines.md (99%) rename website/docs/user-guide/skills/{bundled => optional}/mlops/mlops-training-axolotl.md (97%) rename website/docs/user-guide/skills/{bundled => optional}/mlops/mlops-training-trl-fine-tuning.md (86%) rename website/docs/user-guide/skills/{bundled => optional}/mlops/mlops-training-unsloth.md (95%) diff --git a/skills/mlops/inference/outlines/SKILL.md b/optional-skills/mlops/inference/outlines/SKILL.md similarity index 100% rename from skills/mlops/inference/outlines/SKILL.md rename to optional-skills/mlops/inference/outlines/SKILL.md diff --git a/skills/mlops/inference/outlines/references/backends.md b/optional-skills/mlops/inference/outlines/references/backends.md similarity index 100% rename from skills/mlops/inference/outlines/references/backends.md rename to optional-skills/mlops/inference/outlines/references/backends.md diff --git a/skills/mlops/inference/outlines/references/examples.md b/optional-skills/mlops/inference/outlines/references/examples.md similarity index 100% rename from skills/mlops/inference/outlines/references/examples.md rename to optional-skills/mlops/inference/outlines/references/examples.md diff --git a/skills/mlops/inference/outlines/references/json_generation.md b/optional-skills/mlops/inference/outlines/references/json_generation.md similarity index 100% rename from skills/mlops/inference/outlines/references/json_generation.md rename to optional-skills/mlops/inference/outlines/references/json_generation.md diff --git a/skills/mlops/training/axolotl/SKILL.md b/optional-skills/mlops/training/axolotl/SKILL.md similarity index 100% rename from skills/mlops/training/axolotl/SKILL.md rename to optional-skills/mlops/training/axolotl/SKILL.md diff --git a/skills/mlops/training/axolotl/references/api.md b/optional-skills/mlops/training/axolotl/references/api.md similarity index 100% rename from skills/mlops/training/axolotl/references/api.md rename to optional-skills/mlops/training/axolotl/references/api.md diff --git a/skills/mlops/training/axolotl/references/dataset-formats.md b/optional-skills/mlops/training/axolotl/references/dataset-formats.md similarity index 100% rename from skills/mlops/training/axolotl/references/dataset-formats.md rename to optional-skills/mlops/training/axolotl/references/dataset-formats.md diff --git a/skills/mlops/training/axolotl/references/index.md b/optional-skills/mlops/training/axolotl/references/index.md similarity index 100% rename from skills/mlops/training/axolotl/references/index.md rename to optional-skills/mlops/training/axolotl/references/index.md diff --git a/skills/mlops/training/axolotl/references/other.md b/optional-skills/mlops/training/axolotl/references/other.md similarity index 100% rename from skills/mlops/training/axolotl/references/other.md rename to optional-skills/mlops/training/axolotl/references/other.md diff --git a/skills/mlops/training/trl-fine-tuning/SKILL.md b/optional-skills/mlops/training/trl-fine-tuning/SKILL.md similarity index 100% rename from skills/mlops/training/trl-fine-tuning/SKILL.md rename to optional-skills/mlops/training/trl-fine-tuning/SKILL.md diff --git a/skills/mlops/training/trl-fine-tuning/references/dpo-variants.md b/optional-skills/mlops/training/trl-fine-tuning/references/dpo-variants.md similarity index 100% rename from skills/mlops/training/trl-fine-tuning/references/dpo-variants.md rename to optional-skills/mlops/training/trl-fine-tuning/references/dpo-variants.md diff --git a/skills/mlops/training/trl-fine-tuning/references/grpo-training.md b/optional-skills/mlops/training/trl-fine-tuning/references/grpo-training.md similarity index 100% rename from skills/mlops/training/trl-fine-tuning/references/grpo-training.md rename to optional-skills/mlops/training/trl-fine-tuning/references/grpo-training.md diff --git a/skills/mlops/training/trl-fine-tuning/references/online-rl.md b/optional-skills/mlops/training/trl-fine-tuning/references/online-rl.md similarity index 100% rename from skills/mlops/training/trl-fine-tuning/references/online-rl.md rename to optional-skills/mlops/training/trl-fine-tuning/references/online-rl.md diff --git a/skills/mlops/training/trl-fine-tuning/references/reward-modeling.md b/optional-skills/mlops/training/trl-fine-tuning/references/reward-modeling.md similarity index 100% rename from skills/mlops/training/trl-fine-tuning/references/reward-modeling.md rename to optional-skills/mlops/training/trl-fine-tuning/references/reward-modeling.md diff --git a/skills/mlops/training/trl-fine-tuning/references/sft-training.md b/optional-skills/mlops/training/trl-fine-tuning/references/sft-training.md similarity index 100% rename from skills/mlops/training/trl-fine-tuning/references/sft-training.md rename to optional-skills/mlops/training/trl-fine-tuning/references/sft-training.md diff --git a/skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py b/optional-skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py similarity index 100% rename from skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py rename to optional-skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py diff --git a/skills/mlops/training/unsloth/SKILL.md b/optional-skills/mlops/training/unsloth/SKILL.md similarity index 100% rename from skills/mlops/training/unsloth/SKILL.md rename to optional-skills/mlops/training/unsloth/SKILL.md diff --git a/skills/mlops/training/unsloth/references/index.md b/optional-skills/mlops/training/unsloth/references/index.md similarity index 100% rename from skills/mlops/training/unsloth/references/index.md rename to optional-skills/mlops/training/unsloth/references/index.md diff --git a/skills/mlops/training/unsloth/references/llms-full.md b/optional-skills/mlops/training/unsloth/references/llms-full.md similarity index 100% rename from skills/mlops/training/unsloth/references/llms-full.md rename to optional-skills/mlops/training/unsloth/references/llms-full.md diff --git a/skills/mlops/training/unsloth/references/llms-txt.md b/optional-skills/mlops/training/unsloth/references/llms-txt.md similarity index 100% rename from skills/mlops/training/unsloth/references/llms-txt.md rename to optional-skills/mlops/training/unsloth/references/llms-txt.md diff --git a/skills/mlops/training/unsloth/references/llms.md b/optional-skills/mlops/training/unsloth/references/llms.md similarity index 100% rename from skills/mlops/training/unsloth/references/llms.md rename to optional-skills/mlops/training/unsloth/references/llms.md diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md index 9743596c5aa..1cedabe4ff2 100644 --- a/website/docs/reference/optional-skills-catalog.md +++ b/website/docs/reference/optional-skills-catalog.md @@ -114,6 +114,7 @@ hermes skills uninstall | Skill | Description | |-------|-------------| | [**huggingface-accelerate**](/docs/user-guide/skills/optional/mlops/mlops-accelerate) | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. Automatic device placement, mixed precision (FP16/BF16/FP8). Interactive config, single launch comm... | +| [**axolotl**](/docs/user-guide/skills/optional/mlops/mlops-training-axolotl) | Axolotl: YAML LLM fine-tuning (LoRA, DPO, GRPO). | | [**chroma**](/docs/user-guide/skills/optional/mlops/mlops-chroma) | Open-source embedding database for AI applications. Store embeddings and metadata, perform vector and full-text search, filter by metadata. Simple 4-function API. Scales from notebooks to production clusters. Use for semantic search, RAG... | | [**clip**](/docs/user-guide/skills/optional/mlops/mlops-clip) | OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks w... | | [**faiss**](/docs/user-guide/skills/optional/mlops/mlops-faiss) | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). Use for fast k-NN search, large-scale vector retrieval, or whe... | @@ -126,6 +127,7 @@ hermes skills uninstall | [**llava**](/docs/user-guide/skills/optional/mlops/mlops-llava) | Large Language and Vision Assistant. Enables visual instruction tuning and image-based conversations. Combines CLIP vision encoder with Vicuna/LLaMA language models. Supports multi-turn image chat, visual question answering, and instruct... | | [**modal-serverless-gpu**](/docs/user-guide/skills/optional/mlops/mlops-modal) | Serverless GPU cloud platform for running ML workloads. Use when you need on-demand GPU access without infrastructure management, deploying ML models as APIs, or running batch jobs with automatic scaling. | | [**nemo-curator**](/docs/user-guide/skills/optional/mlops/mlops-nemo-curator) | GPU-accelerated data curation for LLM training. Supports text/image/video/audio. Features fuzzy deduplication (16× faster), quality filtering (30+ heuristics), semantic deduplication, PII redaction, NSFW detection. Scales across GPUs wit... | +| [**outlines**](/docs/user-guide/skills/optional/mlops/mlops-inference-outlines) | Outlines: structured JSON/regex/Pydantic LLM generation. | | [**peft-fine-tuning**](/docs/user-guide/skills/optional/mlops/mlops-peft) | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Use when fine-tuning large models (7B-70B) with limited GPU memory, when you need to train <1% of parameters with minimal accuracy loss, or for multi-adapter se... | | [**pinecone**](/docs/user-guide/skills/optional/mlops/mlops-pinecone) | Managed vector database for production AI applications. Fully managed, auto-scaling, with hybrid search (dense + sparse), metadata filtering, and namespaces. Low latency (<100ms p95). Use for production RAG, recommendation systems, or se... | | [**pytorch-fsdp**](/docs/user-guide/skills/optional/mlops/mlops-pytorch-fsdp) | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP - parameter sharding, mixed precision, CPU offloading, FSDP2 | @@ -137,6 +139,8 @@ hermes skills uninstall | [**stable-diffusion-image-generation**](/docs/user-guide/skills/optional/mlops/mlops-stable-diffusion) | State-of-the-art text-to-image generation with Stable Diffusion models via HuggingFace Diffusers. Use when generating images from text prompts, performing image-to-image translation, inpainting, or building custom diffusion pipelines. | | [**tensorrt-llm**](/docs/user-guide/skills/optional/mlops/mlops-tensorrt-llm) | Optimizes LLM inference with NVIDIA TensorRT for maximum throughput and lowest latency. Use for production deployment on NVIDIA GPUs (A100/H100), when you need 10-100x faster inference than PyTorch, or for serving models with quantizatio... | | [**distributed-llm-pretraining-torchtitan**](/docs/user-guide/skills/optional/mlops/mlops-torchtitan) | Provides PyTorch-native distributed LLM pretraining using torchtitan with 4D parallelism (FSDP2, TP, PP, CP). Use when pretraining Llama 3.1, DeepSeek V3, or custom models at scale from 8 to 512+ GPUs with Float8, torch.compile, and dist... | +| [**fine-tuning-with-trl**](/docs/user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning) | TRL: SFT, DPO, PPO, GRPO, reward modeling for LLM RLHF. | +| [**unsloth**](/docs/user-guide/skills/optional/mlops/mlops-training-unsloth) | Unsloth: 2-5x faster LoRA/QLoRA fine-tuning, less VRAM. | | [**whisper**](/docs/user-guide/skills/optional/mlops/mlops-whisper) | OpenAI's general-purpose speech recognition model. Supports 99 languages, transcription, translation to English, and language identification. Six model sizes from tiny (39M params) to large (1550M params). Use for speech-to-text, podcast... | ## productivity diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md index 8094789bd10..c100a303514 100644 --- a/website/docs/reference/skills-catalog.md +++ b/website/docs/reference/skills-catalog.md @@ -120,16 +120,12 @@ If a skill is missing from this list but present in the repo, the catalog is reg | Skill | Description | Path | |-------|-------------|------| | [`audiocraft-audio-generation`](/docs/user-guide/skills/bundled/mlops/mlops-models-audiocraft) | AudioCraft: MusicGen text-to-music, AudioGen text-to-sound. | `mlops/models/audiocraft` | -| [`axolotl`](/docs/user-guide/skills/bundled/mlops/mlops-training-axolotl) | Axolotl: YAML LLM fine-tuning (LoRA, DPO, GRPO). | `mlops/training/axolotl` | | [`dspy`](/docs/user-guide/skills/bundled/mlops/mlops-research-dspy) | DSPy: declarative LM programs, auto-optimize prompts, RAG. | `mlops/research/dspy` | | [`huggingface-hub`](/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub) | HuggingFace hf CLI: search/download/upload models, datasets. | `mlops/huggingface-hub` | | [`llama-cpp`](/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp) | llama.cpp local GGUF inference + HF Hub model discovery. | `mlops/inference/llama-cpp` | | [`evaluating-llms-harness`](/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness) | lm-eval-harness: benchmark LLMs (MMLU, GSM8K, etc.). | `mlops/evaluation/lm-evaluation-harness` | | [`obliteratus`](/docs/user-guide/skills/bundled/mlops/mlops-inference-obliteratus) | OBLITERATUS: abliterate LLM refusals (diff-in-means). | `mlops/inference/obliteratus` | -| [`outlines`](/docs/user-guide/skills/bundled/mlops/mlops-inference-outlines) | Outlines: structured JSON/regex/Pydantic LLM generation. | `mlops/inference/outlines` | | [`segment-anything-model`](/docs/user-guide/skills/bundled/mlops/mlops-models-segment-anything) | SAM: zero-shot image segmentation via points, boxes, masks. | `mlops/models/segment-anything` | -| [`fine-tuning-with-trl`](/docs/user-guide/skills/bundled/mlops/mlops-training-trl-fine-tuning) | TRL: SFT, DPO, PPO, GRPO, reward modeling for LLM RLHF. | `mlops/training/trl-fine-tuning` | -| [`unsloth`](/docs/user-guide/skills/bundled/mlops/mlops-training-unsloth) | Unsloth: 2-5x faster LoRA/QLoRA fine-tuning, less VRAM. | `mlops/training/unsloth` | | [`serving-llms-vllm`](/docs/user-guide/skills/bundled/mlops/mlops-inference-vllm) | vLLM: high-throughput LLM serving, OpenAI API, quantization. | `mlops/inference/vllm` | | [`weights-and-biases`](/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases) | W&B: log ML experiments, sweeps, model registry, dashboards. | `mlops/evaluation/weights-and-biases` | diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-inference-outlines.md b/website/docs/user-guide/skills/optional/mlops/mlops-inference-outlines.md similarity index 99% rename from website/docs/user-guide/skills/bundled/mlops/mlops-inference-outlines.md rename to website/docs/user-guide/skills/optional/mlops/mlops-inference-outlines.md index 04d3a7c5d14..a9ec78effb2 100644 --- a/website/docs/user-guide/skills/bundled/mlops/mlops-inference-outlines.md +++ b/website/docs/user-guide/skills/optional/mlops/mlops-inference-outlines.md @@ -14,8 +14,8 @@ Outlines: structured JSON/regex/Pydantic LLM generation. | | | |---|---| -| Source | Bundled (installed by default) | -| Path | `skills/mlops/inference/outlines` | +| Source | Optional — install with `hermes skills install official/mlops/outlines` | +| Path | `optional-skills/mlops/inference/outlines` | | Version | `1.0.0` | | Author | Orchestra Research | | License | MIT | diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-training-axolotl.md b/website/docs/user-guide/skills/optional/mlops/mlops-training-axolotl.md similarity index 97% rename from website/docs/user-guide/skills/bundled/mlops/mlops-training-axolotl.md rename to website/docs/user-guide/skills/optional/mlops/mlops-training-axolotl.md index f6a198232f6..7f0b9b80710 100644 --- a/website/docs/user-guide/skills/bundled/mlops/mlops-training-axolotl.md +++ b/website/docs/user-guide/skills/optional/mlops/mlops-training-axolotl.md @@ -14,8 +14,8 @@ Axolotl: YAML LLM fine-tuning (LoRA, DPO, GRPO). | | | |---|---| -| Source | Bundled (installed by default) | -| Path | `skills/mlops/training/axolotl` | +| Source | Optional — install with `hermes skills install official/mlops/axolotl` | +| Path | `optional-skills/mlops/training/axolotl` | | Version | `1.0.0` | | Author | Orchestra Research | | License | MIT | diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-training-trl-fine-tuning.md b/website/docs/user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning.md similarity index 86% rename from website/docs/user-guide/skills/bundled/mlops/mlops-training-trl-fine-tuning.md rename to website/docs/user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning.md index bef3c52802c..eb5d0311a47 100644 --- a/website/docs/user-guide/skills/bundled/mlops/mlops-training-trl-fine-tuning.md +++ b/website/docs/user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning.md @@ -14,8 +14,8 @@ TRL: SFT, DPO, PPO, GRPO, reward modeling for LLM RLHF. | | | |---|---| -| Source | Bundled (installed by default) | -| Path | `skills/mlops/training/trl-fine-tuning` | +| Source | Optional — install with `hermes skills install official/mlops/trl-fine-tuning` | +| Path | `optional-skills/mlops/training/trl-fine-tuning` | | Version | `1.0.0` | | Author | Orchestra Research | | License | MIT | @@ -270,7 +270,7 @@ trl dpo \ Train with reinforcement learning using minimal memory. -For in-depth GRPO guidance — reward function design, critical training insights (loss behavior, mode collapse, tuning), and advanced multi-stage patterns — see **[references/grpo-training.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/training/trl-fine-tuning/references/grpo-training.md)**. A production-ready training script is in **[templates/basic_grpo_training.py](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py)**. +For in-depth GRPO guidance — reward function design, critical training insights (loss behavior, mode collapse, tuning), and advanced multi-stage patterns — see **[references/grpo-training.md](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/mlops/training/trl-fine-tuning/references/grpo-training.md)**. A production-ready training script is in **[templates/basic_grpo_training.py](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py)**. Copy this checklist: @@ -440,15 +440,15 @@ config = PPOConfig( ## Advanced topics -**SFT training guide**: See [references/sft-training.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/training/trl-fine-tuning/references/sft-training.md) for dataset formats, chat templates, packing strategies, and multi-GPU training. +**SFT training guide**: See [references/sft-training.md](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/mlops/training/trl-fine-tuning/references/sft-training.md) for dataset formats, chat templates, packing strategies, and multi-GPU training. -**DPO variants**: See [references/dpo-variants.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/training/trl-fine-tuning/references/dpo-variants.md) for IPO, cDPO, RPO, and other DPO loss functions with recommended hyperparameters. +**DPO variants**: See [references/dpo-variants.md](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/mlops/training/trl-fine-tuning/references/dpo-variants.md) for IPO, cDPO, RPO, and other DPO loss functions with recommended hyperparameters. -**Reward modeling**: See [references/reward-modeling.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/training/trl-fine-tuning/references/reward-modeling.md) for outcome vs process rewards, Bradley-Terry loss, and reward model evaluation. +**Reward modeling**: See [references/reward-modeling.md](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/mlops/training/trl-fine-tuning/references/reward-modeling.md) for outcome vs process rewards, Bradley-Terry loss, and reward model evaluation. -**Online RL methods**: See [references/online-rl.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/training/trl-fine-tuning/references/online-rl.md) for PPO, GRPO, RLOO, and OnlineDPO with detailed configurations. +**Online RL methods**: See [references/online-rl.md](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/mlops/training/trl-fine-tuning/references/online-rl.md) for PPO, GRPO, RLOO, and OnlineDPO with detailed configurations. -**GRPO deep dive**: See [references/grpo-training.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/training/trl-fine-tuning/references/grpo-training.md) for expert-level GRPO patterns — reward function design philosophy, training insights (why loss increases, mode collapse detection), hyperparameter tuning, multi-stage training, and troubleshooting. Production-ready template in [templates/basic_grpo_training.py](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py). +**GRPO deep dive**: See [references/grpo-training.md](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/mlops/training/trl-fine-tuning/references/grpo-training.md) for expert-level GRPO patterns — reward function design philosophy, training insights (why loss increases, mode collapse detection), hyperparameter tuning, multi-stage training, and troubleshooting. Production-ready template in [templates/basic_grpo_training.py](https://github.com/NousResearch/hermes-agent/blob/main/optional-skills/mlops/training/trl-fine-tuning/templates/basic_grpo_training.py). ## Hardware requirements diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-training-unsloth.md b/website/docs/user-guide/skills/optional/mlops/mlops-training-unsloth.md similarity index 95% rename from website/docs/user-guide/skills/bundled/mlops/mlops-training-unsloth.md rename to website/docs/user-guide/skills/optional/mlops/mlops-training-unsloth.md index d1a012322e0..cf4566a1811 100644 --- a/website/docs/user-guide/skills/bundled/mlops/mlops-training-unsloth.md +++ b/website/docs/user-guide/skills/optional/mlops/mlops-training-unsloth.md @@ -14,8 +14,8 @@ Unsloth: 2-5x faster LoRA/QLoRA fine-tuning, less VRAM. | | | |---|---| -| Source | Bundled (installed by default) | -| Path | `skills/mlops/training/unsloth` | +| Source | Optional — install with `hermes skills install official/mlops/unsloth` | +| Path | `optional-skills/mlops/training/unsloth` | | Version | `1.0.0` | | Author | Orchestra Research | | License | MIT |