diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py index 1114821d99d..d111159c013 100644 --- a/hermes_cli/profiles.py +++ b/hermes_cli/profiles.py @@ -64,13 +64,39 @@ _CLONE_SUBDIR_FILES = [ "memories/USER.md", ] -# Runtime files stripped after --clone-all (shouldn't carry over) -_CLONE_ALL_STRIP = [ +# Runtime files stripped after --clone-all (shouldn't carry over). +# Kept as a post-copy step rather than in the ignore filter because they +# are created dynamically during normal use and may be absent at copy time. +_CLONE_ALL_STRIP: list[str] = [ "gateway.pid", "gateway_state.json", "processes.json", ] +# Infrastructure artifacts excluded from --clone-all when the source is the +# default profile (``~/.hermes``). Named profiles never contain these +# directories at root, so the exclusion is gated to avoid silently dropping +# user data from a named-profile source. +# +# Rationale per item: +# hermes-agent — git repo checkout (~84 MB source + ~3 GB venv) +# .worktrees — git worktrees +# profiles — sibling named profiles (recursive copy never intended) +# bin — installed binaries (tirith etc., ~10 MB) shared per-host +# node_modules — npm packages (hundreds of MB) +# +# See ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` below for the broader export-side +# exclusion list (export drops state.db / logs / caches too because the +# archive is a portable snapshot; clone-all keeps those because the cloned +# profile is meant to keep working immediately). +_CLONE_ALL_DEFAULT_EXCLUDE_ROOT: frozenset[str] = frozenset({ + "hermes-agent", + ".worktrees", + "profiles", + "bin", + "node_modules", +}) + # Marker file written by `hermes profile create --no-skills`. When present in # a profile's root, callers of seed_profile_skills() (fresh-create, `hermes # update`'s all-profile sync, the web dashboard) skip bundled-skill seeding @@ -89,23 +115,48 @@ def has_bundled_skills_opt_out(profile_dir: Path) -> bool: def _clone_all_copytree_ignore(source_dir: Path): - """Ignore ``profiles/`` at the root of *source_dir* only. + """Exclude infrastructure artifacts when cloning a profile via --clone-all. - ``~/.hermes`` contains ``profiles//`` for sibling named profiles. - ``shutil.copytree`` would otherwise duplicate that entire tree inside the - new profile (recursive ``.../profiles/.../profiles/...``). Export already - excludes ``profiles`` via ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` — match that - behavior for ``--clone-all``. + Two categories: + 1. Root-level entries in ``_CLONE_ALL_DEFAULT_EXCLUDE_ROOT`` — known + Hermes infrastructure directories that only the default profile + (``~/.hermes``) ever contains. Gated on ``source_dir`` actually + being the default profile so a named-profile source never has its + own data silently dropped. + 2. Universal exclusions at any depth — Python bytecode caches that + are stale or regenerable (``__pycache__``, ``*.pyc``, ``*.pyo``) + and runtime sockets / temp files (``*.sock``, ``*.tmp``). + + The export-side ignore (``_default_export_ignore``) uses the same + two-tier pattern with the broader ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` set + because the export archive is a portable snapshot rather than a live + clone. """ source_resolved = source_dir.resolve() + is_default_source = source_resolved == _get_default_hermes_home().resolve() def _ignore(directory: str, names: List[str]) -> List[str]: - try: - if Path(directory).resolve() == source_resolved: - return [n for n in names if n == "profiles"] - except (OSError, ValueError): - pass - return [] + ignored: list[str] = [] + for entry in names: + # Universal exclusions at any depth. + if ( + entry == "__pycache__" + or entry.endswith((".pyc", ".pyo", ".sock", ".tmp")) + ): + ignored.append(entry) + continue + # Root-level exclusions only apply when cloning the default profile. + if is_default_source: + try: + if Path(directory).resolve() == source_resolved: + if entry in _CLONE_ALL_DEFAULT_EXCLUDE_ROOT: + ignored.append(entry) + except (OSError, ValueError): + # ``resolve()`` can fail on unusual FS layouts (broken + # symlinks, missing parents). Fail open — better to + # over-copy than silently drop user data. + pass + return ignored return _ignore diff --git a/tests/hermes_cli/test_profiles.py b/tests/hermes_cli/test_profiles.py index 88bc09b694c..f4c8a4d1ff6 100644 --- a/tests/hermes_cli/test_profiles.py +++ b/tests/hermes_cli/test_profiles.py @@ -244,6 +244,64 @@ class TestCreateProfile: assert (profile_dir / "memories" / "note.md").read_text() == "remember this" assert not (profile_dir / "profiles").exists() + def test_clone_all_excludes_default_infrastructure(self, profile_env): + """--clone-all from default profile excludes hermes-agent, .worktrees, + bin, node_modules at root, plus __pycache__/*.pyc/*.pyo/*.sock/*.tmp + at any depth. Profile data (config, env, skills, sessions, logs, + state.db) must be preserved — clone-all means "complete snapshot + minus infrastructure." + """ + tmp_path = profile_env + default_home = tmp_path / ".hermes" + # Simulate infrastructure dirs that only the default profile has + (default_home / "hermes-agent" / ".git").mkdir(parents=True) + (default_home / "hermes-agent" / "venv" / "bin").mkdir(parents=True) + (default_home / "hermes-agent" / "README.md").write_text("repo") + (default_home / ".worktrees" / "some-tree").mkdir(parents=True) + (default_home / "profiles" / "other").mkdir(parents=True) + (default_home / "profiles" / "other" / "config.yaml").write_text("x") + (default_home / "bin").mkdir(exist_ok=True) + (default_home / "bin" / "tool").write_text("binary") + (default_home / "node_modules" / ".package-lock.json").mkdir(parents=True) + # Bytecode + temp files at nested depth (universal exclusion) + (default_home / "skills" / "my-skill" / "__pycache__").mkdir(parents=True) + (default_home / "skills" / "my-skill" / "__pycache__" / "module.cpython-311.pyc").write_text("stale") + (default_home / "skills" / "my-skill" / "module.pyc").write_text("stale") + (default_home / "skills" / "my-skill" / "module.pyo").write_text("stale") + (default_home / "data.sock").write_text("socket") + (default_home / "data.tmp").write_text("tmp") + # Profile data that SHOULD be copied + (default_home / "skills" / "my-skill").mkdir(parents=True, exist_ok=True) + (default_home / "skills" / "my-skill" / "SKILL.md").write_text("skill") + (default_home / "config.yaml").write_text("model: gpt-4") + (default_home / ".env").write_text("KEY=val") + (default_home / "state.db").write_text("sessions-data") + (default_home / "sessions").mkdir(exist_ok=True) + (default_home / "logs").mkdir(exist_ok=True) + (default_home / "logs" / "gateway.log").write_text("log") + + profile_dir = create_profile("cloned", clone_all=True, no_alias=True) + + # Infrastructure must be excluded + assert not (profile_dir / "hermes-agent").exists() + assert not (profile_dir / ".worktrees").exists() + assert not (profile_dir / "profiles").exists() + assert not (profile_dir / "bin").exists() + assert not (profile_dir / "node_modules").exists() + # Universal exclusions at any depth + assert not (profile_dir / "data.sock").exists() + assert not (profile_dir / "data.tmp").exists() + assert not (profile_dir / "skills" / "my-skill" / "__pycache__").exists() + assert not (profile_dir / "skills" / "my-skill" / "module.pyc").exists() + assert not (profile_dir / "skills" / "my-skill" / "module.pyo").exists() + # All profile data must be present + assert (profile_dir / "skills" / "my-skill" / "SKILL.md").read_text() == "skill" + assert (profile_dir / "config.yaml").read_text() == "model: gpt-4" + assert (profile_dir / ".env").read_text() == "KEY=val" + assert (profile_dir / "state.db").read_text() == "sessions-data" + assert (profile_dir / "sessions").exists() + assert (profile_dir / "logs" / "gateway.log").read_text() == "log" + def test_clone_config_missing_files_skipped(self, profile_env): """Clone config gracefully skips files that don't exist in source.""" profile_dir = create_profile("coder", clone_config=True, no_alias=True)