diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py index 0064881c43f..770a8de4569 100644 --- a/hermes_cli/backup.py +++ b/hermes_cli/backup.py @@ -34,14 +34,38 @@ logger = logging.getLogger(__name__) # ``hermes-agent`` is special-cased to root level only in ``_should_exclude`` # so that skill directories like ``skills/autonomous-ai-agents/hermes-agent/`` # are not accidentally excluded. +# +# The dependency/cache entries below matter for more than tidiness: without +# them a single plugin venv, MCP-server install, or pip/uv cache living under +# HERMES_HOME gets walked file-by-file, ballooning a backup to hundreds of +# thousands of entries that crawl for hours — the exact "backup stuck for +# days / 426543 files" symptom users hit. The dependency/test-env names mostly +# mirror ``agent.skill_utils.EXCLUDED_SKILL_DIRS`` (the project's canonical +# "regeneratable dir" set); ``.cache`` is an additional backup-only entry, as +# it names a broad regeneratable cache convention (pip/uv/etc.) that the skill +# scanner doesn't need to prune but a backup walk does. We deliberately do NOT +# exclude ``.archive`` here because the curator's ``skills/.archive/`` holds +# restorable user skills that must survive a backup. _EXCLUDED_DIRS = { "hermes-agent", # the codebase repo — re-clone instead "__pycache__", # bytecode caches — regenerated on import ".git", # nested git dirs (profiles shouldn't have these, but safety) - "node_modules", # js deps if website/ somehow leaks in + "node_modules", # js deps — reinstalled on demand "backups", # prior auto-backups — don't nest backups exponentially "checkpoints", # session-local trajectory caches — regenerated per-session, # session-hash-keyed so they don't port to another machine anyway + # Python dependency trees (plugin / MCP-server venvs under HERMES_HOME) — + # regenerated by reinstalling; never irreplaceable state. + ".venv", + "venv", + "site-packages", + # Tool / build caches — all regeneratable. + ".cache", + ".tox", + ".nox", + ".pytest_cache", + ".mypy_cache", + ".ruff_cache", } # File-name suffixes to skip diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py index 762af37069c..e768d2a996c 100644 --- a/tests/hermes_cli/test_backup.py +++ b/tests/hermes_cli/test_backup.py @@ -153,6 +153,39 @@ class TestShouldExclude: assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/SKILL.md")) assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/sub/item.txt")) + @pytest.mark.parametrize( + "rel", + [ + "plugins/my-plugin/.venv/lib/python3.12/site-packages/x/__init__.py", + "plugins/my-plugin/venv/bin/python", + "mcp/server/site-packages/pkg/mod.py", + ".cache/uv/wheels/abc.whl", + "plugins/p/.cache/pip/http/deadbeef", + ".tox/py312/log.txt", + ".nox/tests/bin/pytest", + "plugins/p/.pytest_cache/v/cache/lastfailed", + ".mypy_cache/3.12/agent.meta.json", + ".ruff_cache/0.4.0/abc", + ], + ) + def test_excludes_regeneratable_dependency_and_cache_dirs(self, rel): + """Python dep trees and tool caches under HERMES_HOME must be skipped — + these are what balloon a backup to hundreds of thousands of files.""" + from hermes_cli.backup import _should_exclude + assert _should_exclude(Path(rel)) + + def test_does_not_exclude_curator_archive(self): + """skills/.archive/ holds restorable archived skills and MUST survive + a backup — it is intentionally NOT in the exclusion set.""" + from hermes_cli.backup import _should_exclude + assert not _should_exclude(Path("skills/.archive/old-skill/SKILL.md")) + + def test_does_not_exclude_legit_files_resembling_cache_names(self): + """Only directory-component matches are excluded; a normal file is kept.""" + from hermes_cli.backup import _should_exclude + assert not _should_exclude(Path("skills/my-skill/venv-notes.md")) + assert not _should_exclude(Path("memories/cache.json")) + # --------------------------------------------------------------------------- # Backup tests # --------------------------------------------------------------------------- @@ -272,6 +305,37 @@ class TestBackup: agent_files = [n for n in names if "hermes-agent" in n] assert agent_files == [], f"hermes-agent files leaked into backup: {agent_files}" + def test_excludes_dependency_and_cache_trees(self, tmp_path, monkeypatch): + """A plugin venv / site-packages / pip cache under HERMES_HOME must be + pruned by the walk, while real data (skills, config) is preserved. + This is the regression guard for the ballooning-backup bug.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + _make_hermes_tree(hermes_home) + + # Simulate the heavy regeneratable trees that ballooned the backup. + venv_pkg = hermes_home / "plugins" / "heavy" / ".venv" / "lib" / "site-packages" / "dep" + venv_pkg.mkdir(parents=True) + (venv_pkg / "__init__.py").write_text("# dep\n") + pip_cache = hermes_home / ".cache" / "uv" / "wheels" + pip_cache.mkdir(parents=True) + (pip_cache / "abc.whl").write_bytes(b"\x00") + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + out_zip = tmp_path / "backup.zip" + from hermes_cli.backup import run_backup + run_backup(Namespace(output=str(out_zip))) + + with zipfile.ZipFile(out_zip, "r") as zf: + names = zf.namelist() + leaked = [n for n in names if ".venv" in n or "site-packages" in n or ".cache" in n] + assert leaked == [], f"regeneratable trees leaked into backup: {leaked}" + # Real data still present. + assert "skills/my-skill/SKILL.md" in names + assert "config.yaml" in names + def test_includes_nested_hermes_agent_in_skills(self, tmp_path, monkeypatch): """Backup includes skills/.../hermes-agent/ but NOT root hermes-agent/.""" hermes_home = tmp_path / ".hermes"