fix(profiles): exclude session history, backups, and snapshots from --clone-all (#45246)

--clone-all copied the source profile's state.db, sessions/, backups/,
state-snapshots/, and checkpoints/ into the new profile. These are
per-profile history: a 49GB copy in practice (15GB snapshots + 11GB
backup archives + 16GB state.db + 6.4GB sessions), and restoring a
copied backup inside the clone would resurrect the SOURCE profile's
state. A clone is a fresh workspace; history stays with the source.

New _CLONE_ALL_HISTORY_EXCLUDE_ROOT set, applied at root level for ANY
source profile (named profiles accumulate the same artifacts), unlike
the default-gated infrastructure excludes. Nested same-name dirs still
copy. Docs and the post-create CLI message updated to match; profile
export / hermes backup remain the full-history paths.
This commit is contained in:
Teknium 2026-06-12 15:41:50 -07:00 committed by GitHub
parent 0db5cb8e75
commit 7a318aae22
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 89 additions and 27 deletions

View file

@ -9849,7 +9849,10 @@ def cmd_profile(args):
getattr(args, "clone_from", None) or get_active_profile_name()
)
if clone_all:
print(f"Full copy from {source_label}.")
print(
f"Full copy from {source_label} "
"(excluding session history, backups, and snapshots)."
)
else:
print(
f"Cloned config, .env, SOUL.md, and skills from {source_label}."

View file

@ -88,9 +88,9 @@ _CLONE_ALL_STRIP: list[str] = [
# node_modules — npm packages (hundreds of MB)
#
# See ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` below for the broader export-side
# exclusion list (export drops state.db / logs / caches too because the
# archive is a portable snapshot; clone-all keeps those because the cloned
# profile is meant to keep working immediately).
# exclusion list (export also drops logs / caches because the archive is a
# portable snapshot; clone-all keeps those because the cloned profile is
# meant to keep working immediately).
_CLONE_ALL_DEFAULT_EXCLUDE_ROOT: frozenset[str] = frozenset({
"hermes-agent",
".worktrees",
@ -99,6 +99,30 @@ _CLONE_ALL_DEFAULT_EXCLUDE_ROOT: frozenset[str] = frozenset({
"node_modules",
})
# Per-profile history artifacts excluded from --clone-all regardless of the
# source profile. A new profile is a fresh workspace — inheriting the source
# profile's session history, backup archives, or quick-backup snapshots is
# never useful (restoring one inside the clone would resurrect the SOURCE
# profile's state) and can balloon the copy by tens of GB. Unlike
# ``_CLONE_ALL_DEFAULT_EXCLUDE_ROOT`` this set is NOT gated on the default
# profile: named profiles accumulate the same artifacts.
#
# Rationale per item:
# state.db (+wal/shm) — SQLite session store (can reach many GB)
# sessions — per-session transcript/data dirs
# backups — `hermes backup` archives
# state-snapshots — quick-backup snapshot trees
# checkpoints — session checkpoint data
_CLONE_ALL_HISTORY_EXCLUDE_ROOT: frozenset[str] = frozenset({
"state.db",
"state.db-wal",
"state.db-shm",
"sessions",
"backups",
"state-snapshots",
"checkpoints",
})
# Marker file written by `hermes profile create --no-skills`. When present in
# a profile's root, callers of seed_profile_skills() (fresh-create, `hermes
# update`'s all-profile sync, the web dashboard) skip bundled-skill seeding
@ -119,13 +143,16 @@ def has_bundled_skills_opt_out(profile_dir: Path) -> bool:
def _clone_all_copytree_ignore(source_dir: Path):
"""Exclude infrastructure artifacts when cloning a profile via --clone-all.
Two categories:
1. Root-level entries in ``_CLONE_ALL_DEFAULT_EXCLUDE_ROOT`` known
Three categories:
1. Root-level entries in ``_CLONE_ALL_HISTORY_EXCLUDE_ROOT`` session
history, backups, and snapshots that belong to the SOURCE profile
and should never carry into a fresh clone. Applies to any source.
2. Root-level entries in ``_CLONE_ALL_DEFAULT_EXCLUDE_ROOT`` known
Hermes infrastructure directories that only the default profile
(``~/.hermes``) ever contains. Gated on ``source_dir`` actually
being the default profile so a named-profile source never has its
own data silently dropped.
2. Universal exclusions at any depth Python bytecode caches that
3. Universal exclusions at any depth Python bytecode caches that
are stale or regenerable (``__pycache__``, ``*.pyc``, ``*.pyo``)
and runtime sockets / temp files (``*.sock``, ``*.tmp``).
@ -147,17 +174,21 @@ def _clone_all_copytree_ignore(source_dir: Path):
):
ignored.append(entry)
continue
# Root-level exclusions only apply when cloning the default profile.
if is_default_source:
try:
if Path(directory).resolve() == source_resolved:
if entry in _CLONE_ALL_DEFAULT_EXCLUDE_ROOT:
ignored.append(entry)
except (OSError, ValueError):
# ``resolve()`` can fail on unusual FS layouts (broken
# symlinks, missing parents). Fail open — better to
# over-copy than silently drop user data.
pass
try:
at_root = Path(directory).resolve() == source_resolved
except (OSError, ValueError):
# ``resolve()`` can fail on unusual FS layouts (broken
# symlinks, missing parents). Fail open — better to
# over-copy than silently drop user data.
at_root = False
if at_root:
# History artifacts: excluded for ANY source profile.
if entry in _CLONE_ALL_HISTORY_EXCLUDE_ROOT:
ignored.append(entry)
continue
# Infrastructure: only the default profile contains these.
if is_default_source and entry in _CLONE_ALL_DEFAULT_EXCLUDE_ROOT:
ignored.append(entry)
return ignored
return _ignore

View file

@ -268,9 +268,9 @@ class TestCreateProfile:
def test_clone_all_excludes_default_infrastructure(self, profile_env):
"""--clone-all from default profile excludes hermes-agent, .worktrees,
bin, node_modules at root, plus __pycache__/*.pyc/*.pyo/*.sock/*.tmp
at any depth. Profile data (config, env, skills, sessions, logs,
state.db) must be preserved clone-all means "complete snapshot
minus infrastructure."
at any depth. Profile data (config, env, skills, logs) must be
preserved clone-all means "complete snapshot minus infrastructure
and per-profile history."
"""
tmp_path = profile_env
default_home = tmp_path / ".hermes"
@ -296,8 +296,6 @@ class TestCreateProfile:
(default_home / "skills" / "my-skill" / "SKILL.md").write_text("skill")
(default_home / "config.yaml").write_text("model: gpt-4")
(default_home / ".env").write_text("KEY=val")
(default_home / "state.db").write_text("sessions-data")
(default_home / "sessions").mkdir(exist_ok=True)
(default_home / "logs").mkdir(exist_ok=True)
(default_home / "logs" / "gateway.log").write_text("log")
@ -319,10 +317,40 @@ class TestCreateProfile:
assert (profile_dir / "skills" / "my-skill" / "SKILL.md").read_text() == "skill"
assert (profile_dir / "config.yaml").read_text() == "model: gpt-4"
assert (profile_dir / ".env").read_text() == "KEY=val"
assert (profile_dir / "state.db").read_text() == "sessions-data"
assert (profile_dir / "sessions").exists()
assert (profile_dir / "logs" / "gateway.log").read_text() == "log"
def test_clone_all_excludes_history_artifacts(self, profile_env):
"""--clone-all excludes the source's session history, backups, and
snapshots a clone is a fresh workspace, and these can reach tens
of GB. Applies to ANY source profile, not just default.
"""
tmp_path = profile_env
default_home = tmp_path / ".hermes"
(default_home / "state.db").write_text("sessions-data")
(default_home / "state.db-wal").write_text("wal")
(default_home / "state.db-shm").write_text("shm")
(default_home / "sessions" / "20260101_old").mkdir(parents=True)
(default_home / "backups").mkdir(exist_ok=True)
(default_home / "backups" / "backup.tar.gz").write_text("archive")
(default_home / "state-snapshots" / "snap1").mkdir(parents=True)
(default_home / "checkpoints" / "cp1").mkdir(parents=True)
# Data that should still copy
(default_home / "config.yaml").write_text("model: gpt-4")
# Nested dirs with the same names must NOT be excluded (root-only)
(default_home / "workspace" / "backups").mkdir(parents=True)
(default_home / "workspace" / "backups" / "user-data.txt").write_text("mine")
profile_dir = create_profile("fresh", clone_all=True, no_alias=True)
for history in (
"state.db", "state.db-wal", "state.db-shm",
"sessions", "backups", "state-snapshots", "checkpoints",
):
assert not (profile_dir / history).exists(), history
assert (profile_dir / "config.yaml").read_text() == "model: gpt-4"
# Root-only: nested same-name dirs survive
assert (profile_dir / "workspace" / "backups" / "user-data.txt").read_text() == "mine"
def test_clone_config_missing_files_skipped(self, profile_env):
"""Clone config gracefully skips files that don't exist in source."""
profile_dir = create_profile("coder", clone_config=True, no_alias=True)

View file

@ -81,7 +81,7 @@ Creates a new profile.
|-------------------|-------------|
| `<name>` | Name for the new profile. Must be a valid directory name (alphanumeric, hyphens, underscores). |
| `--clone` | Copy `config.yaml`, `.env`, and `SOUL.md` from the current profile. |
| `--clone-all` | Copy everything (config, memories, skills, sessions, state) from the current profile. |
| `--clone-all` | Copy everything (config, memories, skills, cron, plugins) from the current profile. Excludes per-profile history: sessions, `state.db`, backups, state-snapshots, checkpoints. |
| `--clone-from <profile>` | Clone from a specific profile instead of the current one. Used with `--clone` or `--clone-all`. |
| `--no-alias` | Skip wrapper script creation. |
| `--description "<text>"` | One- or two-sentence description of what this profile is good at. Used by the kanban orchestrator to route tasks based on role instead of profile name alone. Skip and add later via `hermes profile describe`. Persisted in `<profile_dir>/profile.yaml`. |

View file

@ -58,7 +58,7 @@ Copies your current profile's `config.yaml`, `.env`, and `SOUL.md` into the new
hermes profile create backup --clone-all
```
Copies **everything** — config, API keys, personality, all memories, full session history, skills, cron jobs, plugins. A complete snapshot. Useful for backups or forking an agent that already has context.
Copies **everything** — config, API keys, personality, all memories, skills, cron jobs, plugins. A complete working snapshot. Per-profile history is excluded (session history, `state.db`, `backups/`, `state-snapshots/`, `checkpoints/`) — these belong to the source profile and can reach tens of GB. For a full backup including history, use `hermes profile export` or `hermes backup` instead.
### Clone from a specific profile