feat(skill): darwinian-evolver optional skill

Thin wrapper around Imbue's darwinian_evolver (AGPL-3.0, subprocess-only). Ships a working OpenRouter driver (parrot_openrouter.py), a snapshot inspector (show_snapshot.py), and a custom-problem template. SKILL.md has 58-char description, Pitfalls sourced from actually running the loop: non-viable seed trap, Azure content filter killing runs, loop.run() being a generator, nested-pickle snapshots, and aggressive default concurrency. Salvaged from #12719 by @Bihruze — original PR shipped 12,289 LOC across 61 files (29 Python modules, FastAPI dashboard, VS Code extension, benchmark hub, marketplace, etc.) which was far beyond the scope of the underlying issue (#336). This version stays at the ~700-LOC scope that issue actually asked for. Authorship of the original effort credited via AUTHOR_MAP entry and the SKILL.md author field. Verified end-to-end: seed 'Say {{ phrase }}' (score 0.000) evolved into 'Please repeat the following phrase exactly as it is, without any modifications or additional formatting: {{ phrase }}' (score 0.750) across 3 iterations on gpt-4o-mini via OpenRouter. Co-authored-by: Bihruze <98262967+Bihruze@users.noreply.github.com>
2026-05-18 04:41:56 +00:00 · 2026-05-15 21:54:56 -07:00 · 2026-05-15 21:54:56 -07:00 · c9b32a654c
commit c9b32a654c
parent e377833fa6
5 changed files with 828 additions and 0 deletions
--- a/tests/skills/test_darwinian_evolver_skill.py
+++ b/tests/skills/test_darwinian_evolver_skill.py
@ -0,0 +1,102 @@
+"""
+Smoke tests for the darwinian-evolver optional skill.
+
+We can't actually run the evolution loop in CI (it needs network + a paid LLM),
+so these tests verify:
+  - SKILL.md frontmatter conforms to the hardline format
+  - shipped scripts parse as valid Python
+  - the scripts reference the right env var / module paths
+"""
+from __future__ import annotations
+
+import ast
+import re
+from pathlib import Path
+
+import pytest
+import yaml
+
+SKILL_DIR = Path(__file__).resolve().parents[2] / "optional-skills" / "research" / "darwinian-evolver"
+
+
+@pytest.fixture(scope="module")
+def frontmatter() -> dict:
+    src = (SKILL_DIR / "SKILL.md").read_text()
+    m = re.search(r"^---\n(.*?)\n---", src, re.DOTALL)
+    assert m, "SKILL.md missing YAML frontmatter"
+    return yaml.safe_load(m.group(1))
+
+
+def test_skill_dir_exists() -> None:
+    assert SKILL_DIR.is_dir(), f"missing skill dir: {SKILL_DIR}"
+
+
+def test_skill_md_present() -> None:
+    assert (SKILL_DIR / "SKILL.md").is_file()
+
+
+def test_description_under_60_chars(frontmatter) -> None:
+    desc = frontmatter["description"]
+    assert len(desc) <= 60, f"description is {len(desc)} chars (hardline ≤60): {desc!r}"
+
+
+def test_name_matches_dir(frontmatter) -> None:
+    assert frontmatter["name"] == "darwinian-evolver"
+
+
+def test_platforms_excludes_windows(frontmatter) -> None:
+    # Upstream uses func_timeout (POSIX signals) and uv subprocess pipelines; the
+    # skill is gated [linux, macos]. If we ever port to Windows, update this test
+    # to assert ["linux", "macos", "windows"].
+    assert "windows" not in frontmatter["platforms"]
+    assert set(frontmatter["platforms"]) >= {"linux", "macos"}
+
+
+def test_author_credits_contributor(frontmatter) -> None:
+    author = frontmatter["author"]
+    assert "Bihruze" in author, f"author should credit the original contributor: {author!r}"
+
+
+def test_license_mit(frontmatter) -> None:
+    assert frontmatter["license"] == "MIT"
+
+
+@pytest.mark.parametrize(
+    "path",
+    [
+        "scripts/parrot_openrouter.py",
+        "scripts/show_snapshot.py",
+        "templates/custom_problem_template.py",
+    ],
+)
+def test_shipped_scripts_parse(path: str) -> None:
+    src = (SKILL_DIR / path).read_text()
+    ast.parse(src)  # raises SyntaxError on broken Python
+
+
+def test_parrot_script_uses_openrouter() -> None:
+    src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
+    assert "OPENROUTER_API_KEY" in src, "parrot driver should read OPENROUTER_API_KEY"
+    assert "openrouter.ai/api/v1" in src, "parrot driver should target OpenRouter"
+    assert "EVOLVER_MODEL" in src, "model should be overridable via EVOLVER_MODEL"
+
+
+def test_parrot_script_has_error_swallowing() -> None:
+    """Provider content-filter / rate-limit must not kill the run — see Pitfall 2."""
+    src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
+    assert "LLM_ERROR" in src, "_prompt_llm should swallow provider errors and tag them"
+
+
+def test_skill_calls_out_agpl(frontmatter) -> None:
+    """The upstream tool is AGPL-3.0. The skill MUST flag this so users don't
+    import it into MIT-licensed code by accident."""
+    src = (SKILL_DIR / "SKILL.md").read_text()
+    assert "AGPL" in src, "SKILL.md must mention upstream AGPL license"
+
+
+def test_skill_pitfalls_section_present() -> None:
+    src = (SKILL_DIR / "SKILL.md").read_text()
+    assert "## Pitfalls" in src
+    # Pitfalls we discovered during the spike — keep them in sync with reality.
+    assert "Initial organism must be viable" in src
+    assert "generator" in src  # loop.run() pitfall