mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(profiles): exclude infrastructure artifacts when cloning with --clone-all
When the source profile is the default (~/.hermes), shutil.copytree() was copying multi-GB infrastructure alongside the ~40 MB of actual profile data: hermes-agent/ (repo checkout + 3 GB venv), .worktrees/, profiles/ (sibling profiles — recursive!), bin/ (installed binaries), node_modules/ (hundreds of MB). Add _CLONE_ALL_DEFAULT_EXCLUDE_ROOT frozenset with these five entries and pass an ignore callback to copytree(). Exclusions are gated on the source actually being the default profile (is_default_source) so named-profile sources are never affected. Also exclude at any depth: __pycache__/, *.pyc, *.pyo, *.sock, *.tmp. Profile data (config.yaml, .env, auth.json, state.db, sessions/, skills/, logs/) is preserved intact — clone-all means 'complete snapshot minus infrastructure'. Mirrors the approach already used by _default_export_ignore() and _DEFAULT_EXPORT_EXCLUDE_ROOT (the export-side exclusion set which is broader because it produces a portable archive, not a live clone). Co-authored-by: MustafaKara7 <karamusti912@gmail.com> Co-authored-by: fahdad <30740087+fahdad@users.noreply.github.com> Fixes #5022 Based on PRs #5025, #5026, and #21728
This commit is contained in:
parent
93e25ceb13
commit
f7e514d4ad
2 changed files with 123 additions and 14 deletions
|
|
@ -64,13 +64,39 @@ _CLONE_SUBDIR_FILES = [
|
|||
"memories/USER.md",
|
||||
]
|
||||
|
||||
# Runtime files stripped after --clone-all (shouldn't carry over)
|
||||
_CLONE_ALL_STRIP = [
|
||||
# Runtime files stripped after --clone-all (shouldn't carry over).
|
||||
# Kept as a post-copy step rather than in the ignore filter because they
|
||||
# are created dynamically during normal use and may be absent at copy time.
|
||||
_CLONE_ALL_STRIP: list[str] = [
|
||||
"gateway.pid",
|
||||
"gateway_state.json",
|
||||
"processes.json",
|
||||
]
|
||||
|
||||
# Infrastructure artifacts excluded from --clone-all when the source is the
|
||||
# default profile (``~/.hermes``). Named profiles never contain these
|
||||
# directories at root, so the exclusion is gated to avoid silently dropping
|
||||
# user data from a named-profile source.
|
||||
#
|
||||
# Rationale per item:
|
||||
# hermes-agent — git repo checkout (~84 MB source + ~3 GB venv)
|
||||
# .worktrees — git worktrees
|
||||
# profiles — sibling named profiles (recursive copy never intended)
|
||||
# bin — installed binaries (tirith etc., ~10 MB) shared per-host
|
||||
# node_modules — npm packages (hundreds of MB)
|
||||
#
|
||||
# See ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` below for the broader export-side
|
||||
# exclusion list (export drops state.db / logs / caches too because the
|
||||
# archive is a portable snapshot; clone-all keeps those because the cloned
|
||||
# profile is meant to keep working immediately).
|
||||
_CLONE_ALL_DEFAULT_EXCLUDE_ROOT: frozenset[str] = frozenset({
|
||||
"hermes-agent",
|
||||
".worktrees",
|
||||
"profiles",
|
||||
"bin",
|
||||
"node_modules",
|
||||
})
|
||||
|
||||
# Marker file written by `hermes profile create --no-skills`. When present in
|
||||
# a profile's root, callers of seed_profile_skills() (fresh-create, `hermes
|
||||
# update`'s all-profile sync, the web dashboard) skip bundled-skill seeding
|
||||
|
|
@ -89,23 +115,48 @@ def has_bundled_skills_opt_out(profile_dir: Path) -> bool:
|
|||
|
||||
|
||||
def _clone_all_copytree_ignore(source_dir: Path):
|
||||
"""Ignore ``profiles/`` at the root of *source_dir* only.
|
||||
"""Exclude infrastructure artifacts when cloning a profile via --clone-all.
|
||||
|
||||
``~/.hermes`` contains ``profiles/<name>/`` for sibling named profiles.
|
||||
``shutil.copytree`` would otherwise duplicate that entire tree inside the
|
||||
new profile (recursive ``.../profiles/.../profiles/...``). Export already
|
||||
excludes ``profiles`` via ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` — match that
|
||||
behavior for ``--clone-all``.
|
||||
Two categories:
|
||||
1. Root-level entries in ``_CLONE_ALL_DEFAULT_EXCLUDE_ROOT`` — known
|
||||
Hermes infrastructure directories that only the default profile
|
||||
(``~/.hermes``) ever contains. Gated on ``source_dir`` actually
|
||||
being the default profile so a named-profile source never has its
|
||||
own data silently dropped.
|
||||
2. Universal exclusions at any depth — Python bytecode caches that
|
||||
are stale or regenerable (``__pycache__``, ``*.pyc``, ``*.pyo``)
|
||||
and runtime sockets / temp files (``*.sock``, ``*.tmp``).
|
||||
|
||||
The export-side ignore (``_default_export_ignore``) uses the same
|
||||
two-tier pattern with the broader ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` set
|
||||
because the export archive is a portable snapshot rather than a live
|
||||
clone.
|
||||
"""
|
||||
source_resolved = source_dir.resolve()
|
||||
is_default_source = source_resolved == _get_default_hermes_home().resolve()
|
||||
|
||||
def _ignore(directory: str, names: List[str]) -> List[str]:
|
||||
try:
|
||||
if Path(directory).resolve() == source_resolved:
|
||||
return [n for n in names if n == "profiles"]
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
return []
|
||||
ignored: list[str] = []
|
||||
for entry in names:
|
||||
# Universal exclusions at any depth.
|
||||
if (
|
||||
entry == "__pycache__"
|
||||
or entry.endswith((".pyc", ".pyo", ".sock", ".tmp"))
|
||||
):
|
||||
ignored.append(entry)
|
||||
continue
|
||||
# Root-level exclusions only apply when cloning the default profile.
|
||||
if is_default_source:
|
||||
try:
|
||||
if Path(directory).resolve() == source_resolved:
|
||||
if entry in _CLONE_ALL_DEFAULT_EXCLUDE_ROOT:
|
||||
ignored.append(entry)
|
||||
except (OSError, ValueError):
|
||||
# ``resolve()`` can fail on unusual FS layouts (broken
|
||||
# symlinks, missing parents). Fail open — better to
|
||||
# over-copy than silently drop user data.
|
||||
pass
|
||||
return ignored
|
||||
|
||||
return _ignore
|
||||
|
||||
|
|
|
|||
|
|
@ -244,6 +244,64 @@ class TestCreateProfile:
|
|||
assert (profile_dir / "memories" / "note.md").read_text() == "remember this"
|
||||
assert not (profile_dir / "profiles").exists()
|
||||
|
||||
def test_clone_all_excludes_default_infrastructure(self, profile_env):
|
||||
"""--clone-all from default profile excludes hermes-agent, .worktrees,
|
||||
bin, node_modules at root, plus __pycache__/*.pyc/*.pyo/*.sock/*.tmp
|
||||
at any depth. Profile data (config, env, skills, sessions, logs,
|
||||
state.db) must be preserved — clone-all means "complete snapshot
|
||||
minus infrastructure."
|
||||
"""
|
||||
tmp_path = profile_env
|
||||
default_home = tmp_path / ".hermes"
|
||||
# Simulate infrastructure dirs that only the default profile has
|
||||
(default_home / "hermes-agent" / ".git").mkdir(parents=True)
|
||||
(default_home / "hermes-agent" / "venv" / "bin").mkdir(parents=True)
|
||||
(default_home / "hermes-agent" / "README.md").write_text("repo")
|
||||
(default_home / ".worktrees" / "some-tree").mkdir(parents=True)
|
||||
(default_home / "profiles" / "other").mkdir(parents=True)
|
||||
(default_home / "profiles" / "other" / "config.yaml").write_text("x")
|
||||
(default_home / "bin").mkdir(exist_ok=True)
|
||||
(default_home / "bin" / "tool").write_text("binary")
|
||||
(default_home / "node_modules" / ".package-lock.json").mkdir(parents=True)
|
||||
# Bytecode + temp files at nested depth (universal exclusion)
|
||||
(default_home / "skills" / "my-skill" / "__pycache__").mkdir(parents=True)
|
||||
(default_home / "skills" / "my-skill" / "__pycache__" / "module.cpython-311.pyc").write_text("stale")
|
||||
(default_home / "skills" / "my-skill" / "module.pyc").write_text("stale")
|
||||
(default_home / "skills" / "my-skill" / "module.pyo").write_text("stale")
|
||||
(default_home / "data.sock").write_text("socket")
|
||||
(default_home / "data.tmp").write_text("tmp")
|
||||
# Profile data that SHOULD be copied
|
||||
(default_home / "skills" / "my-skill").mkdir(parents=True, exist_ok=True)
|
||||
(default_home / "skills" / "my-skill" / "SKILL.md").write_text("skill")
|
||||
(default_home / "config.yaml").write_text("model: gpt-4")
|
||||
(default_home / ".env").write_text("KEY=val")
|
||||
(default_home / "state.db").write_text("sessions-data")
|
||||
(default_home / "sessions").mkdir(exist_ok=True)
|
||||
(default_home / "logs").mkdir(exist_ok=True)
|
||||
(default_home / "logs" / "gateway.log").write_text("log")
|
||||
|
||||
profile_dir = create_profile("cloned", clone_all=True, no_alias=True)
|
||||
|
||||
# Infrastructure must be excluded
|
||||
assert not (profile_dir / "hermes-agent").exists()
|
||||
assert not (profile_dir / ".worktrees").exists()
|
||||
assert not (profile_dir / "profiles").exists()
|
||||
assert not (profile_dir / "bin").exists()
|
||||
assert not (profile_dir / "node_modules").exists()
|
||||
# Universal exclusions at any depth
|
||||
assert not (profile_dir / "data.sock").exists()
|
||||
assert not (profile_dir / "data.tmp").exists()
|
||||
assert not (profile_dir / "skills" / "my-skill" / "__pycache__").exists()
|
||||
assert not (profile_dir / "skills" / "my-skill" / "module.pyc").exists()
|
||||
assert not (profile_dir / "skills" / "my-skill" / "module.pyo").exists()
|
||||
# All profile data must be present
|
||||
assert (profile_dir / "skills" / "my-skill" / "SKILL.md").read_text() == "skill"
|
||||
assert (profile_dir / "config.yaml").read_text() == "model: gpt-4"
|
||||
assert (profile_dir / ".env").read_text() == "KEY=val"
|
||||
assert (profile_dir / "state.db").read_text() == "sessions-data"
|
||||
assert (profile_dir / "sessions").exists()
|
||||
assert (profile_dir / "logs" / "gateway.log").read_text() == "log"
|
||||
|
||||
def test_clone_config_missing_files_skipped(self, profile_env):
|
||||
"""Clone config gracefully skips files that don't exist in source."""
|
||||
profile_dir = create_profile("coder", clone_config=True, no_alias=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue