mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
feat(skill): darwinian-evolver optional skill
Thin wrapper around Imbue's darwinian_evolver (AGPL-3.0, subprocess-only). Ships a working OpenRouter driver (parrot_openrouter.py), a snapshot inspector (show_snapshot.py), and a custom-problem template. SKILL.md has 58-char description, Pitfalls sourced from actually running the loop: non-viable seed trap, Azure content filter killing runs, loop.run() being a generator, nested-pickle snapshots, and aggressive default concurrency. Salvaged from #12719 by @Bihruze — original PR shipped 12,289 LOC across 61 files (29 Python modules, FastAPI dashboard, VS Code extension, benchmark hub, marketplace, etc.) which was far beyond the scope of the underlying issue (#336). This version stays at the ~700-LOC scope that issue actually asked for. Authorship of the original effort credited via AUTHOR_MAP entry and the SKILL.md author field. Verified end-to-end: seed 'Say {{ phrase }}' (score 0.000) evolved into 'Please repeat the following phrase exactly as it is, without any modifications or additional formatting: {{ phrase }}' (score 0.750) across 3 iterations on gpt-4o-mini via OpenRouter. Co-authored-by: Bihruze <98262967+Bihruze@users.noreply.github.com>
This commit is contained in:
parent
e377833fa6
commit
c9b32a654c
5 changed files with 828 additions and 0 deletions
102
tests/skills/test_darwinian_evolver_skill.py
Normal file
102
tests/skills/test_darwinian_evolver_skill.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
"""
|
||||
Smoke tests for the darwinian-evolver optional skill.
|
||||
|
||||
We can't actually run the evolution loop in CI (it needs network + a paid LLM),
|
||||
so these tests verify:
|
||||
- SKILL.md frontmatter conforms to the hardline format
|
||||
- shipped scripts parse as valid Python
|
||||
- the scripts reference the right env var / module paths
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
SKILL_DIR = Path(__file__).resolve().parents[2] / "optional-skills" / "research" / "darwinian-evolver"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def frontmatter() -> dict:
|
||||
src = (SKILL_DIR / "SKILL.md").read_text()
|
||||
m = re.search(r"^---\n(.*?)\n---", src, re.DOTALL)
|
||||
assert m, "SKILL.md missing YAML frontmatter"
|
||||
return yaml.safe_load(m.group(1))
|
||||
|
||||
|
||||
def test_skill_dir_exists() -> None:
|
||||
assert SKILL_DIR.is_dir(), f"missing skill dir: {SKILL_DIR}"
|
||||
|
||||
|
||||
def test_skill_md_present() -> None:
|
||||
assert (SKILL_DIR / "SKILL.md").is_file()
|
||||
|
||||
|
||||
def test_description_under_60_chars(frontmatter) -> None:
|
||||
desc = frontmatter["description"]
|
||||
assert len(desc) <= 60, f"description is {len(desc)} chars (hardline ≤60): {desc!r}"
|
||||
|
||||
|
||||
def test_name_matches_dir(frontmatter) -> None:
|
||||
assert frontmatter["name"] == "darwinian-evolver"
|
||||
|
||||
|
||||
def test_platforms_excludes_windows(frontmatter) -> None:
|
||||
# Upstream uses func_timeout (POSIX signals) and uv subprocess pipelines; the
|
||||
# skill is gated [linux, macos]. If we ever port to Windows, update this test
|
||||
# to assert ["linux", "macos", "windows"].
|
||||
assert "windows" not in frontmatter["platforms"]
|
||||
assert set(frontmatter["platforms"]) >= {"linux", "macos"}
|
||||
|
||||
|
||||
def test_author_credits_contributor(frontmatter) -> None:
|
||||
author = frontmatter["author"]
|
||||
assert "Bihruze" in author, f"author should credit the original contributor: {author!r}"
|
||||
|
||||
|
||||
def test_license_mit(frontmatter) -> None:
|
||||
assert frontmatter["license"] == "MIT"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"path",
|
||||
[
|
||||
"scripts/parrot_openrouter.py",
|
||||
"scripts/show_snapshot.py",
|
||||
"templates/custom_problem_template.py",
|
||||
],
|
||||
)
|
||||
def test_shipped_scripts_parse(path: str) -> None:
|
||||
src = (SKILL_DIR / path).read_text()
|
||||
ast.parse(src) # raises SyntaxError on broken Python
|
||||
|
||||
|
||||
def test_parrot_script_uses_openrouter() -> None:
|
||||
src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
|
||||
assert "OPENROUTER_API_KEY" in src, "parrot driver should read OPENROUTER_API_KEY"
|
||||
assert "openrouter.ai/api/v1" in src, "parrot driver should target OpenRouter"
|
||||
assert "EVOLVER_MODEL" in src, "model should be overridable via EVOLVER_MODEL"
|
||||
|
||||
|
||||
def test_parrot_script_has_error_swallowing() -> None:
|
||||
"""Provider content-filter / rate-limit must not kill the run — see Pitfall 2."""
|
||||
src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
|
||||
assert "LLM_ERROR" in src, "_prompt_llm should swallow provider errors and tag them"
|
||||
|
||||
|
||||
def test_skill_calls_out_agpl(frontmatter) -> None:
|
||||
"""The upstream tool is AGPL-3.0. The skill MUST flag this so users don't
|
||||
import it into MIT-licensed code by accident."""
|
||||
src = (SKILL_DIR / "SKILL.md").read_text()
|
||||
assert "AGPL" in src, "SKILL.md must mention upstream AGPL license"
|
||||
|
||||
|
||||
def test_skill_pitfalls_section_present() -> None:
|
||||
src = (SKILL_DIR / "SKILL.md").read_text()
|
||||
assert "## Pitfalls" in src
|
||||
# Pitfalls we discovered during the spike — keep them in sync with reality.
|
||||
assert "Initial organism must be viable" in src
|
||||
assert "generator" in src # loop.run() pitfall
|
||||
Loading…
Add table
Add a link
Reference in a new issue