mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
Thin wrapper around Imbue's darwinian_evolver (AGPL-3.0, subprocess-only). Ships a working OpenRouter driver (parrot_openrouter.py), a snapshot inspector (show_snapshot.py), and a custom-problem template. SKILL.md has 58-char description, Pitfalls sourced from actually running the loop: non-viable seed trap, Azure content filter killing runs, loop.run() being a generator, nested-pickle snapshots, and aggressive default concurrency. Salvaged from #12719 by @Bihruze — original PR shipped 12,289 LOC across 61 files (29 Python modules, FastAPI dashboard, VS Code extension, benchmark hub, marketplace, etc.) which was far beyond the scope of the underlying issue (#336). This version stays at the ~700-LOC scope that issue actually asked for. Authorship of the original effort credited via AUTHOR_MAP entry and the SKILL.md author field. Verified end-to-end: seed 'Say {{ phrase }}' (score 0.000) evolved into 'Please repeat the following phrase exactly as it is, without any modifications or additional formatting: {{ phrase }}' (score 0.750) across 3 iterations on gpt-4o-mini via OpenRouter. Co-authored-by: Bihruze <98262967+Bihruze@users.noreply.github.com>
102 lines
3.5 KiB
Python
102 lines
3.5 KiB
Python
"""
|
|
Smoke tests for the darwinian-evolver optional skill.
|
|
|
|
We can't actually run the evolution loop in CI (it needs network + a paid LLM),
|
|
so these tests verify:
|
|
- SKILL.md frontmatter conforms to the hardline format
|
|
- shipped scripts parse as valid Python
|
|
- the scripts reference the right env var / module paths
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import ast
|
|
import re
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
SKILL_DIR = Path(__file__).resolve().parents[2] / "optional-skills" / "research" / "darwinian-evolver"
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def frontmatter() -> dict:
|
|
src = (SKILL_DIR / "SKILL.md").read_text()
|
|
m = re.search(r"^---\n(.*?)\n---", src, re.DOTALL)
|
|
assert m, "SKILL.md missing YAML frontmatter"
|
|
return yaml.safe_load(m.group(1))
|
|
|
|
|
|
def test_skill_dir_exists() -> None:
|
|
assert SKILL_DIR.is_dir(), f"missing skill dir: {SKILL_DIR}"
|
|
|
|
|
|
def test_skill_md_present() -> None:
|
|
assert (SKILL_DIR / "SKILL.md").is_file()
|
|
|
|
|
|
def test_description_under_60_chars(frontmatter) -> None:
|
|
desc = frontmatter["description"]
|
|
assert len(desc) <= 60, f"description is {len(desc)} chars (hardline ≤60): {desc!r}"
|
|
|
|
|
|
def test_name_matches_dir(frontmatter) -> None:
|
|
assert frontmatter["name"] == "darwinian-evolver"
|
|
|
|
|
|
def test_platforms_excludes_windows(frontmatter) -> None:
|
|
# Upstream uses func_timeout (POSIX signals) and uv subprocess pipelines; the
|
|
# skill is gated [linux, macos]. If we ever port to Windows, update this test
|
|
# to assert ["linux", "macos", "windows"].
|
|
assert "windows" not in frontmatter["platforms"]
|
|
assert set(frontmatter["platforms"]) >= {"linux", "macos"}
|
|
|
|
|
|
def test_author_credits_contributor(frontmatter) -> None:
|
|
author = frontmatter["author"]
|
|
assert "Bihruze" in author, f"author should credit the original contributor: {author!r}"
|
|
|
|
|
|
def test_license_mit(frontmatter) -> None:
|
|
assert frontmatter["license"] == "MIT"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"path",
|
|
[
|
|
"scripts/parrot_openrouter.py",
|
|
"scripts/show_snapshot.py",
|
|
"templates/custom_problem_template.py",
|
|
],
|
|
)
|
|
def test_shipped_scripts_parse(path: str) -> None:
|
|
src = (SKILL_DIR / path).read_text()
|
|
ast.parse(src) # raises SyntaxError on broken Python
|
|
|
|
|
|
def test_parrot_script_uses_openrouter() -> None:
|
|
src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
|
|
assert "OPENROUTER_API_KEY" in src, "parrot driver should read OPENROUTER_API_KEY"
|
|
assert "openrouter.ai/api/v1" in src, "parrot driver should target OpenRouter"
|
|
assert "EVOLVER_MODEL" in src, "model should be overridable via EVOLVER_MODEL"
|
|
|
|
|
|
def test_parrot_script_has_error_swallowing() -> None:
|
|
"""Provider content-filter / rate-limit must not kill the run — see Pitfall 2."""
|
|
src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
|
|
assert "LLM_ERROR" in src, "_prompt_llm should swallow provider errors and tag them"
|
|
|
|
|
|
def test_skill_calls_out_agpl(frontmatter) -> None:
|
|
"""The upstream tool is AGPL-3.0. The skill MUST flag this so users don't
|
|
import it into MIT-licensed code by accident."""
|
|
src = (SKILL_DIR / "SKILL.md").read_text()
|
|
assert "AGPL" in src, "SKILL.md must mention upstream AGPL license"
|
|
|
|
|
|
def test_skill_pitfalls_section_present() -> None:
|
|
src = (SKILL_DIR / "SKILL.md").read_text()
|
|
assert "## Pitfalls" in src
|
|
# Pitfalls we discovered during the spike — keep them in sync with reality.
|
|
assert "Initial organism must be viable" in src
|
|
assert "generator" in src # loop.run() pitfall
|