mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
Merge pull request #48941 from kshitijk4poor/salvage-48887-backup-exclude-dirs
fix(backup): exclude regeneratable dep/cache dirs so backups don't balloon
This commit is contained in:
commit
db57a1a035
2 changed files with 89 additions and 1 deletions
|
|
@ -34,14 +34,38 @@ logger = logging.getLogger(__name__)
|
|||
# ``hermes-agent`` is special-cased to root level only in ``_should_exclude``
|
||||
# so that skill directories like ``skills/autonomous-ai-agents/hermes-agent/``
|
||||
# are not accidentally excluded.
|
||||
#
|
||||
# The dependency/cache entries below matter for more than tidiness: without
|
||||
# them a single plugin venv, MCP-server install, or pip/uv cache living under
|
||||
# HERMES_HOME gets walked file-by-file, ballooning a backup to hundreds of
|
||||
# thousands of entries that crawl for hours — the exact "backup stuck for
|
||||
# days / 426543 files" symptom users hit. The dependency/test-env names mostly
|
||||
# mirror ``agent.skill_utils.EXCLUDED_SKILL_DIRS`` (the project's canonical
|
||||
# "regeneratable dir" set); ``.cache`` is an additional backup-only entry, as
|
||||
# it names a broad regeneratable cache convention (pip/uv/etc.) that the skill
|
||||
# scanner doesn't need to prune but a backup walk does. We deliberately do NOT
|
||||
# exclude ``.archive`` here because the curator's ``skills/.archive/`` holds
|
||||
# restorable user skills that must survive a backup.
|
||||
_EXCLUDED_DIRS = {
|
||||
"hermes-agent", # the codebase repo — re-clone instead
|
||||
"__pycache__", # bytecode caches — regenerated on import
|
||||
".git", # nested git dirs (profiles shouldn't have these, but safety)
|
||||
"node_modules", # js deps if website/ somehow leaks in
|
||||
"node_modules", # js deps — reinstalled on demand
|
||||
"backups", # prior auto-backups — don't nest backups exponentially
|
||||
"checkpoints", # session-local trajectory caches — regenerated per-session,
|
||||
# session-hash-keyed so they don't port to another machine anyway
|
||||
# Python dependency trees (plugin / MCP-server venvs under HERMES_HOME) —
|
||||
# regenerated by reinstalling; never irreplaceable state.
|
||||
".venv",
|
||||
"venv",
|
||||
"site-packages",
|
||||
# Tool / build caches — all regeneratable.
|
||||
".cache",
|
||||
".tox",
|
||||
".nox",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
}
|
||||
|
||||
# File-name suffixes to skip
|
||||
|
|
|
|||
|
|
@ -153,6 +153,39 @@ class TestShouldExclude:
|
|||
assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/SKILL.md"))
|
||||
assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/sub/item.txt"))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"rel",
|
||||
[
|
||||
"plugins/my-plugin/.venv/lib/python3.12/site-packages/x/__init__.py",
|
||||
"plugins/my-plugin/venv/bin/python",
|
||||
"mcp/server/site-packages/pkg/mod.py",
|
||||
".cache/uv/wheels/abc.whl",
|
||||
"plugins/p/.cache/pip/http/deadbeef",
|
||||
".tox/py312/log.txt",
|
||||
".nox/tests/bin/pytest",
|
||||
"plugins/p/.pytest_cache/v/cache/lastfailed",
|
||||
".mypy_cache/3.12/agent.meta.json",
|
||||
".ruff_cache/0.4.0/abc",
|
||||
],
|
||||
)
|
||||
def test_excludes_regeneratable_dependency_and_cache_dirs(self, rel):
|
||||
"""Python dep trees and tool caches under HERMES_HOME must be skipped —
|
||||
these are what balloon a backup to hundreds of thousands of files."""
|
||||
from hermes_cli.backup import _should_exclude
|
||||
assert _should_exclude(Path(rel))
|
||||
|
||||
def test_does_not_exclude_curator_archive(self):
|
||||
"""skills/.archive/ holds restorable archived skills and MUST survive
|
||||
a backup — it is intentionally NOT in the exclusion set."""
|
||||
from hermes_cli.backup import _should_exclude
|
||||
assert not _should_exclude(Path("skills/.archive/old-skill/SKILL.md"))
|
||||
|
||||
def test_does_not_exclude_legit_files_resembling_cache_names(self):
|
||||
"""Only directory-component matches are excluded; a normal file is kept."""
|
||||
from hermes_cli.backup import _should_exclude
|
||||
assert not _should_exclude(Path("skills/my-skill/venv-notes.md"))
|
||||
assert not _should_exclude(Path("memories/cache.json"))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backup tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -272,6 +305,37 @@ class TestBackup:
|
|||
agent_files = [n for n in names if "hermes-agent" in n]
|
||||
assert agent_files == [], f"hermes-agent files leaked into backup: {agent_files}"
|
||||
|
||||
def test_excludes_dependency_and_cache_trees(self, tmp_path, monkeypatch):
|
||||
"""A plugin venv / site-packages / pip cache under HERMES_HOME must be
|
||||
pruned by the walk, while real data (skills, config) is preserved.
|
||||
This is the regression guard for the ballooning-backup bug."""
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
_make_hermes_tree(hermes_home)
|
||||
|
||||
# Simulate the heavy regeneratable trees that ballooned the backup.
|
||||
venv_pkg = hermes_home / "plugins" / "heavy" / ".venv" / "lib" / "site-packages" / "dep"
|
||||
venv_pkg.mkdir(parents=True)
|
||||
(venv_pkg / "__init__.py").write_text("# dep\n")
|
||||
pip_cache = hermes_home / ".cache" / "uv" / "wheels"
|
||||
pip_cache.mkdir(parents=True)
|
||||
(pip_cache / "abc.whl").write_bytes(b"\x00")
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
|
||||
out_zip = tmp_path / "backup.zip"
|
||||
from hermes_cli.backup import run_backup
|
||||
run_backup(Namespace(output=str(out_zip)))
|
||||
|
||||
with zipfile.ZipFile(out_zip, "r") as zf:
|
||||
names = zf.namelist()
|
||||
leaked = [n for n in names if ".venv" in n or "site-packages" in n or ".cache" in n]
|
||||
assert leaked == [], f"regeneratable trees leaked into backup: {leaked}"
|
||||
# Real data still present.
|
||||
assert "skills/my-skill/SKILL.md" in names
|
||||
assert "config.yaml" in names
|
||||
|
||||
def test_includes_nested_hermes_agent_in_skills(self, tmp_path, monkeypatch):
|
||||
"""Backup includes skills/.../hermes-agent/ but NOT root hermes-agent/."""
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue