fix(profiles): exclude infrastructure artifacts when cloning with --clone-all

When the source profile is the default (~/.hermes), shutil.copytree()
was copying multi-GB infrastructure alongside the ~40 MB of actual
profile data: hermes-agent/ (repo checkout + 3 GB venv), .worktrees/,
profiles/ (sibling profiles — recursive!), bin/ (installed binaries),
node_modules/ (hundreds of MB).

Add _CLONE_ALL_DEFAULT_EXCLUDE_ROOT frozenset with these five entries
and pass an ignore callback to copytree().  Exclusions are gated on
the source actually being the default profile (is_default_source) so
named-profile sources are never affected.

Also exclude at any depth: __pycache__/, *.pyc, *.pyo, *.sock, *.tmp.
Profile data (config.yaml, .env, auth.json, state.db, sessions/,
skills/, logs/) is preserved intact — clone-all means 'complete
snapshot minus infrastructure'.

Mirrors the approach already used by _default_export_ignore() and
_DEFAULT_EXPORT_EXCLUDE_ROOT (the export-side exclusion set which is
broader because it produces a portable archive, not a live clone).

Co-authored-by: MustafaKara7 <karamusti912@gmail.com>
Co-authored-by: fahdad <30740087+fahdad@users.noreply.github.com>
Fixes #5022
Based on PRs #5025, #5026, and #21728
This commit is contained in:
donrhmexe 2026-05-09 15:39:27 +05:30 committed by kshitij
parent 93e25ceb13
commit f7e514d4ad
2 changed files with 123 additions and 14 deletions

View file

@ -244,6 +244,64 @@ class TestCreateProfile:
assert (profile_dir / "memories" / "note.md").read_text() == "remember this"
assert not (profile_dir / "profiles").exists()
def test_clone_all_excludes_default_infrastructure(self, profile_env):
"""--clone-all from default profile excludes hermes-agent, .worktrees,
bin, node_modules at root, plus __pycache__/*.pyc/*.pyo/*.sock/*.tmp
at any depth. Profile data (config, env, skills, sessions, logs,
state.db) must be preserved clone-all means "complete snapshot
minus infrastructure."
"""
tmp_path = profile_env
default_home = tmp_path / ".hermes"
# Simulate infrastructure dirs that only the default profile has
(default_home / "hermes-agent" / ".git").mkdir(parents=True)
(default_home / "hermes-agent" / "venv" / "bin").mkdir(parents=True)
(default_home / "hermes-agent" / "README.md").write_text("repo")
(default_home / ".worktrees" / "some-tree").mkdir(parents=True)
(default_home / "profiles" / "other").mkdir(parents=True)
(default_home / "profiles" / "other" / "config.yaml").write_text("x")
(default_home / "bin").mkdir(exist_ok=True)
(default_home / "bin" / "tool").write_text("binary")
(default_home / "node_modules" / ".package-lock.json").mkdir(parents=True)
# Bytecode + temp files at nested depth (universal exclusion)
(default_home / "skills" / "my-skill" / "__pycache__").mkdir(parents=True)
(default_home / "skills" / "my-skill" / "__pycache__" / "module.cpython-311.pyc").write_text("stale")
(default_home / "skills" / "my-skill" / "module.pyc").write_text("stale")
(default_home / "skills" / "my-skill" / "module.pyo").write_text("stale")
(default_home / "data.sock").write_text("socket")
(default_home / "data.tmp").write_text("tmp")
# Profile data that SHOULD be copied
(default_home / "skills" / "my-skill").mkdir(parents=True, exist_ok=True)
(default_home / "skills" / "my-skill" / "SKILL.md").write_text("skill")
(default_home / "config.yaml").write_text("model: gpt-4")
(default_home / ".env").write_text("KEY=val")
(default_home / "state.db").write_text("sessions-data")
(default_home / "sessions").mkdir(exist_ok=True)
(default_home / "logs").mkdir(exist_ok=True)
(default_home / "logs" / "gateway.log").write_text("log")
profile_dir = create_profile("cloned", clone_all=True, no_alias=True)
# Infrastructure must be excluded
assert not (profile_dir / "hermes-agent").exists()
assert not (profile_dir / ".worktrees").exists()
assert not (profile_dir / "profiles").exists()
assert not (profile_dir / "bin").exists()
assert not (profile_dir / "node_modules").exists()
# Universal exclusions at any depth
assert not (profile_dir / "data.sock").exists()
assert not (profile_dir / "data.tmp").exists()
assert not (profile_dir / "skills" / "my-skill" / "__pycache__").exists()
assert not (profile_dir / "skills" / "my-skill" / "module.pyc").exists()
assert not (profile_dir / "skills" / "my-skill" / "module.pyo").exists()
# All profile data must be present
assert (profile_dir / "skills" / "my-skill" / "SKILL.md").read_text() == "skill"
assert (profile_dir / "config.yaml").read_text() == "model: gpt-4"
assert (profile_dir / ".env").read_text() == "KEY=val"
assert (profile_dir / "state.db").read_text() == "sessions-data"
assert (profile_dir / "sessions").exists()
assert (profile_dir / "logs" / "gateway.log").read_text() == "log"
def test_clone_config_missing_files_skipped(self, profile_env):
"""Clone config gracefully skips files that don't exist in source."""
profile_dir = create_profile("coder", clone_config=True, no_alias=True)