Add Tinker RL training integration and documentation

- pyproject.toml: Added tinker SDK, torch, wandb, math-verify to [atropos] extras - README.md: Added comprehensive RL Training with Tinker section including: - Architecture diagram (3-process pipeline) - Quick start guide for GSM8k agent training - Configuration documentation - RL CLI usage - Sandbox backend options (Nomad, Singularity, Modal) New files in tinker-atropos submodule (committed there): - tinker_atropos/environments/gsm8k_agent.py: Agent GSM8k env with Python REPL tool - configs/gsm8k_agent.yaml: Config for Qwen3-4B training
2026-05-03 02:11:48 +00:00 · 2026-02-09 01:36:20 +00:00 · 2026-02-09 01:36:20 +00:00 · 3b9c53e6db
commit 3b9c53e6db
parent 05dd31131f
2 changed files with 136 additions and 1 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -40,14 +40,18 @@ dev = ["pytest", "pytest-asyncio"]
 messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0"]
 cron = ["croniter"]
 cli = ["simple-term-menu"]
-# Install Atropos from source (PyPI is often stale for this internal dependency).
+# Install Atropos + Tinker training integration from source.
 atropos = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git",
+  "tinker @ git+https://github.com/thinking-machines-lab/tinker.git",
  # Atropos integration runtime deps (kept optional for Hermes-only users)
  "aiohttp",
  "fastapi",
  "uvicorn",
  "pyte",
+  "torch",
+  "wandb",
+  "math-verify",
 ]
 all = [
  "hermes-agent[modal]",