hermes-agent/tests/skills/test_darwinian_evolver_skill.py
teknium1 c9b32a654c feat(skill): darwinian-evolver optional skill
Thin wrapper around Imbue's darwinian_evolver (AGPL-3.0, subprocess-only).
Ships a working OpenRouter driver (parrot_openrouter.py), a snapshot
inspector (show_snapshot.py), and a custom-problem template. SKILL.md
has 58-char description, Pitfalls sourced from actually running the loop:
non-viable seed trap, Azure content filter killing runs, loop.run() being
a generator, nested-pickle snapshots, and aggressive default concurrency.

Salvaged from #12719 by @Bihruze — original PR shipped 12,289 LOC across
61 files (29 Python modules, FastAPI dashboard, VS Code extension,
benchmark hub, marketplace, etc.) which was far beyond the scope of the
underlying issue (#336). This version stays at the ~700-LOC scope that
issue actually asked for. Authorship of the original effort credited via
AUTHOR_MAP entry and the SKILL.md author field.

Verified end-to-end: seed 'Say {{ phrase }}' (score 0.000) evolved into
'Please repeat the following phrase exactly as it is, without any
modifications or additional formatting: {{ phrase }}' (score 0.750)
across 3 iterations on gpt-4o-mini via OpenRouter.

Co-authored-by: Bihruze <98262967+Bihruze@users.noreply.github.com>
2026-05-15 21:56:07 -07:00

102 lines
3.5 KiB
Python

"""
Smoke tests for the darwinian-evolver optional skill.
We can't actually run the evolution loop in CI (it needs network + a paid LLM),
so these tests verify:
- SKILL.md frontmatter conforms to the hardline format
- shipped scripts parse as valid Python
- the scripts reference the right env var / module paths
"""
from __future__ import annotations
import ast
import re
from pathlib import Path
import pytest
import yaml
SKILL_DIR = Path(__file__).resolve().parents[2] / "optional-skills" / "research" / "darwinian-evolver"
@pytest.fixture(scope="module")
def frontmatter() -> dict:
src = (SKILL_DIR / "SKILL.md").read_text()
m = re.search(r"^---\n(.*?)\n---", src, re.DOTALL)
assert m, "SKILL.md missing YAML frontmatter"
return yaml.safe_load(m.group(1))
def test_skill_dir_exists() -> None:
assert SKILL_DIR.is_dir(), f"missing skill dir: {SKILL_DIR}"
def test_skill_md_present() -> None:
assert (SKILL_DIR / "SKILL.md").is_file()
def test_description_under_60_chars(frontmatter) -> None:
desc = frontmatter["description"]
assert len(desc) <= 60, f"description is {len(desc)} chars (hardline ≤60): {desc!r}"
def test_name_matches_dir(frontmatter) -> None:
assert frontmatter["name"] == "darwinian-evolver"
def test_platforms_excludes_windows(frontmatter) -> None:
# Upstream uses func_timeout (POSIX signals) and uv subprocess pipelines; the
# skill is gated [linux, macos]. If we ever port to Windows, update this test
# to assert ["linux", "macos", "windows"].
assert "windows" not in frontmatter["platforms"]
assert set(frontmatter["platforms"]) >= {"linux", "macos"}
def test_author_credits_contributor(frontmatter) -> None:
author = frontmatter["author"]
assert "Bihruze" in author, f"author should credit the original contributor: {author!r}"
def test_license_mit(frontmatter) -> None:
assert frontmatter["license"] == "MIT"
@pytest.mark.parametrize(
"path",
[
"scripts/parrot_openrouter.py",
"scripts/show_snapshot.py",
"templates/custom_problem_template.py",
],
)
def test_shipped_scripts_parse(path: str) -> None:
src = (SKILL_DIR / path).read_text()
ast.parse(src) # raises SyntaxError on broken Python
def test_parrot_script_uses_openrouter() -> None:
src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
assert "OPENROUTER_API_KEY" in src, "parrot driver should read OPENROUTER_API_KEY"
assert "openrouter.ai/api/v1" in src, "parrot driver should target OpenRouter"
assert "EVOLVER_MODEL" in src, "model should be overridable via EVOLVER_MODEL"
def test_parrot_script_has_error_swallowing() -> None:
"""Provider content-filter / rate-limit must not kill the run — see Pitfall 2."""
src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
assert "LLM_ERROR" in src, "_prompt_llm should swallow provider errors and tag them"
def test_skill_calls_out_agpl(frontmatter) -> None:
"""The upstream tool is AGPL-3.0. The skill MUST flag this so users don't
import it into MIT-licensed code by accident."""
src = (SKILL_DIR / "SKILL.md").read_text()
assert "AGPL" in src, "SKILL.md must mention upstream AGPL license"
def test_skill_pitfalls_section_present() -> None:
src = (SKILL_DIR / "SKILL.md").read_text()
assert "## Pitfalls" in src
# Pitfalls we discovered during the spike — keep them in sync with reality.
assert "Initial organism must be viable" in src
assert "generator" in src # loop.run() pitfall