mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-24 05:41:40 +00:00
fix(profiles): exclude infrastructure artifacts when cloning with --clone-all
When the source profile is the default (~/.hermes), shutil.copytree() was copying multi-GB infrastructure alongside the ~40 MB of actual profile data: hermes-agent/ (repo checkout + 3 GB venv), .worktrees/, profiles/ (sibling profiles — recursive!), bin/ (installed binaries), node_modules/ (hundreds of MB). Add _CLONE_ALL_DEFAULT_EXCLUDE_ROOT frozenset with these five entries and pass an ignore callback to copytree(). Exclusions are gated on the source actually being the default profile (is_default_source) so named-profile sources are never affected. Also exclude at any depth: __pycache__/, *.pyc, *.pyo, *.sock, *.tmp. Profile data (config.yaml, .env, auth.json, state.db, sessions/, skills/, logs/) is preserved intact — clone-all means 'complete snapshot minus infrastructure'. Mirrors the approach already used by _default_export_ignore() and _DEFAULT_EXPORT_EXCLUDE_ROOT (the export-side exclusion set which is broader because it produces a portable archive, not a live clone). Co-authored-by: MustafaKara7 <karamusti912@gmail.com> Co-authored-by: fahdad <30740087+fahdad@users.noreply.github.com> Fixes #5022 Based on PRs #5025, #5026, and #21728
This commit is contained in:
parent
93e25ceb13
commit
f7e514d4ad
2 changed files with 123 additions and 14 deletions
|
|
@ -64,13 +64,39 @@ _CLONE_SUBDIR_FILES = [
|
||||||
"memories/USER.md",
|
"memories/USER.md",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Runtime files stripped after --clone-all (shouldn't carry over)
|
# Runtime files stripped after --clone-all (shouldn't carry over).
|
||||||
_CLONE_ALL_STRIP = [
|
# Kept as a post-copy step rather than in the ignore filter because they
|
||||||
|
# are created dynamically during normal use and may be absent at copy time.
|
||||||
|
_CLONE_ALL_STRIP: list[str] = [
|
||||||
"gateway.pid",
|
"gateway.pid",
|
||||||
"gateway_state.json",
|
"gateway_state.json",
|
||||||
"processes.json",
|
"processes.json",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Infrastructure artifacts excluded from --clone-all when the source is the
|
||||||
|
# default profile (``~/.hermes``). Named profiles never contain these
|
||||||
|
# directories at root, so the exclusion is gated to avoid silently dropping
|
||||||
|
# user data from a named-profile source.
|
||||||
|
#
|
||||||
|
# Rationale per item:
|
||||||
|
# hermes-agent — git repo checkout (~84 MB source + ~3 GB venv)
|
||||||
|
# .worktrees — git worktrees
|
||||||
|
# profiles — sibling named profiles (recursive copy never intended)
|
||||||
|
# bin — installed binaries (tirith etc., ~10 MB) shared per-host
|
||||||
|
# node_modules — npm packages (hundreds of MB)
|
||||||
|
#
|
||||||
|
# See ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` below for the broader export-side
|
||||||
|
# exclusion list (export drops state.db / logs / caches too because the
|
||||||
|
# archive is a portable snapshot; clone-all keeps those because the cloned
|
||||||
|
# profile is meant to keep working immediately).
|
||||||
|
_CLONE_ALL_DEFAULT_EXCLUDE_ROOT: frozenset[str] = frozenset({
|
||||||
|
"hermes-agent",
|
||||||
|
".worktrees",
|
||||||
|
"profiles",
|
||||||
|
"bin",
|
||||||
|
"node_modules",
|
||||||
|
})
|
||||||
|
|
||||||
# Marker file written by `hermes profile create --no-skills`. When present in
|
# Marker file written by `hermes profile create --no-skills`. When present in
|
||||||
# a profile's root, callers of seed_profile_skills() (fresh-create, `hermes
|
# a profile's root, callers of seed_profile_skills() (fresh-create, `hermes
|
||||||
# update`'s all-profile sync, the web dashboard) skip bundled-skill seeding
|
# update`'s all-profile sync, the web dashboard) skip bundled-skill seeding
|
||||||
|
|
@ -89,23 +115,48 @@ def has_bundled_skills_opt_out(profile_dir: Path) -> bool:
|
||||||
|
|
||||||
|
|
||||||
def _clone_all_copytree_ignore(source_dir: Path):
|
def _clone_all_copytree_ignore(source_dir: Path):
|
||||||
"""Ignore ``profiles/`` at the root of *source_dir* only.
|
"""Exclude infrastructure artifacts when cloning a profile via --clone-all.
|
||||||
|
|
||||||
``~/.hermes`` contains ``profiles/<name>/`` for sibling named profiles.
|
Two categories:
|
||||||
``shutil.copytree`` would otherwise duplicate that entire tree inside the
|
1. Root-level entries in ``_CLONE_ALL_DEFAULT_EXCLUDE_ROOT`` — known
|
||||||
new profile (recursive ``.../profiles/.../profiles/...``). Export already
|
Hermes infrastructure directories that only the default profile
|
||||||
excludes ``profiles`` via ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` — match that
|
(``~/.hermes``) ever contains. Gated on ``source_dir`` actually
|
||||||
behavior for ``--clone-all``.
|
being the default profile so a named-profile source never has its
|
||||||
|
own data silently dropped.
|
||||||
|
2. Universal exclusions at any depth — Python bytecode caches that
|
||||||
|
are stale or regenerable (``__pycache__``, ``*.pyc``, ``*.pyo``)
|
||||||
|
and runtime sockets / temp files (``*.sock``, ``*.tmp``).
|
||||||
|
|
||||||
|
The export-side ignore (``_default_export_ignore``) uses the same
|
||||||
|
two-tier pattern with the broader ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` set
|
||||||
|
because the export archive is a portable snapshot rather than a live
|
||||||
|
clone.
|
||||||
"""
|
"""
|
||||||
source_resolved = source_dir.resolve()
|
source_resolved = source_dir.resolve()
|
||||||
|
is_default_source = source_resolved == _get_default_hermes_home().resolve()
|
||||||
|
|
||||||
def _ignore(directory: str, names: List[str]) -> List[str]:
|
def _ignore(directory: str, names: List[str]) -> List[str]:
|
||||||
try:
|
ignored: list[str] = []
|
||||||
if Path(directory).resolve() == source_resolved:
|
for entry in names:
|
||||||
return [n for n in names if n == "profiles"]
|
# Universal exclusions at any depth.
|
||||||
except (OSError, ValueError):
|
if (
|
||||||
pass
|
entry == "__pycache__"
|
||||||
return []
|
or entry.endswith((".pyc", ".pyo", ".sock", ".tmp"))
|
||||||
|
):
|
||||||
|
ignored.append(entry)
|
||||||
|
continue
|
||||||
|
# Root-level exclusions only apply when cloning the default profile.
|
||||||
|
if is_default_source:
|
||||||
|
try:
|
||||||
|
if Path(directory).resolve() == source_resolved:
|
||||||
|
if entry in _CLONE_ALL_DEFAULT_EXCLUDE_ROOT:
|
||||||
|
ignored.append(entry)
|
||||||
|
except (OSError, ValueError):
|
||||||
|
# ``resolve()`` can fail on unusual FS layouts (broken
|
||||||
|
# symlinks, missing parents). Fail open — better to
|
||||||
|
# over-copy than silently drop user data.
|
||||||
|
pass
|
||||||
|
return ignored
|
||||||
|
|
||||||
return _ignore
|
return _ignore
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -244,6 +244,64 @@ class TestCreateProfile:
|
||||||
assert (profile_dir / "memories" / "note.md").read_text() == "remember this"
|
assert (profile_dir / "memories" / "note.md").read_text() == "remember this"
|
||||||
assert not (profile_dir / "profiles").exists()
|
assert not (profile_dir / "profiles").exists()
|
||||||
|
|
||||||
|
def test_clone_all_excludes_default_infrastructure(self, profile_env):
|
||||||
|
"""--clone-all from default profile excludes hermes-agent, .worktrees,
|
||||||
|
bin, node_modules at root, plus __pycache__/*.pyc/*.pyo/*.sock/*.tmp
|
||||||
|
at any depth. Profile data (config, env, skills, sessions, logs,
|
||||||
|
state.db) must be preserved — clone-all means "complete snapshot
|
||||||
|
minus infrastructure."
|
||||||
|
"""
|
||||||
|
tmp_path = profile_env
|
||||||
|
default_home = tmp_path / ".hermes"
|
||||||
|
# Simulate infrastructure dirs that only the default profile has
|
||||||
|
(default_home / "hermes-agent" / ".git").mkdir(parents=True)
|
||||||
|
(default_home / "hermes-agent" / "venv" / "bin").mkdir(parents=True)
|
||||||
|
(default_home / "hermes-agent" / "README.md").write_text("repo")
|
||||||
|
(default_home / ".worktrees" / "some-tree").mkdir(parents=True)
|
||||||
|
(default_home / "profiles" / "other").mkdir(parents=True)
|
||||||
|
(default_home / "profiles" / "other" / "config.yaml").write_text("x")
|
||||||
|
(default_home / "bin").mkdir(exist_ok=True)
|
||||||
|
(default_home / "bin" / "tool").write_text("binary")
|
||||||
|
(default_home / "node_modules" / ".package-lock.json").mkdir(parents=True)
|
||||||
|
# Bytecode + temp files at nested depth (universal exclusion)
|
||||||
|
(default_home / "skills" / "my-skill" / "__pycache__").mkdir(parents=True)
|
||||||
|
(default_home / "skills" / "my-skill" / "__pycache__" / "module.cpython-311.pyc").write_text("stale")
|
||||||
|
(default_home / "skills" / "my-skill" / "module.pyc").write_text("stale")
|
||||||
|
(default_home / "skills" / "my-skill" / "module.pyo").write_text("stale")
|
||||||
|
(default_home / "data.sock").write_text("socket")
|
||||||
|
(default_home / "data.tmp").write_text("tmp")
|
||||||
|
# Profile data that SHOULD be copied
|
||||||
|
(default_home / "skills" / "my-skill").mkdir(parents=True, exist_ok=True)
|
||||||
|
(default_home / "skills" / "my-skill" / "SKILL.md").write_text("skill")
|
||||||
|
(default_home / "config.yaml").write_text("model: gpt-4")
|
||||||
|
(default_home / ".env").write_text("KEY=val")
|
||||||
|
(default_home / "state.db").write_text("sessions-data")
|
||||||
|
(default_home / "sessions").mkdir(exist_ok=True)
|
||||||
|
(default_home / "logs").mkdir(exist_ok=True)
|
||||||
|
(default_home / "logs" / "gateway.log").write_text("log")
|
||||||
|
|
||||||
|
profile_dir = create_profile("cloned", clone_all=True, no_alias=True)
|
||||||
|
|
||||||
|
# Infrastructure must be excluded
|
||||||
|
assert not (profile_dir / "hermes-agent").exists()
|
||||||
|
assert not (profile_dir / ".worktrees").exists()
|
||||||
|
assert not (profile_dir / "profiles").exists()
|
||||||
|
assert not (profile_dir / "bin").exists()
|
||||||
|
assert not (profile_dir / "node_modules").exists()
|
||||||
|
# Universal exclusions at any depth
|
||||||
|
assert not (profile_dir / "data.sock").exists()
|
||||||
|
assert not (profile_dir / "data.tmp").exists()
|
||||||
|
assert not (profile_dir / "skills" / "my-skill" / "__pycache__").exists()
|
||||||
|
assert not (profile_dir / "skills" / "my-skill" / "module.pyc").exists()
|
||||||
|
assert not (profile_dir / "skills" / "my-skill" / "module.pyo").exists()
|
||||||
|
# All profile data must be present
|
||||||
|
assert (profile_dir / "skills" / "my-skill" / "SKILL.md").read_text() == "skill"
|
||||||
|
assert (profile_dir / "config.yaml").read_text() == "model: gpt-4"
|
||||||
|
assert (profile_dir / ".env").read_text() == "KEY=val"
|
||||||
|
assert (profile_dir / "state.db").read_text() == "sessions-data"
|
||||||
|
assert (profile_dir / "sessions").exists()
|
||||||
|
assert (profile_dir / "logs" / "gateway.log").read_text() == "log"
|
||||||
|
|
||||||
def test_clone_config_missing_files_skipped(self, profile_env):
|
def test_clone_config_missing_files_skipped(self, profile_env):
|
||||||
"""Clone config gracefully skips files that don't exist in source."""
|
"""Clone config gracefully skips files that don't exist in source."""
|
||||||
profile_dir = create_profile("coder", clone_config=True, no_alias=True)
|
profile_dir = create_profile("coder", clone_config=True, no_alias=True)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue