diff --git a/hermes_constants.py b/hermes_constants.py index 4a0cccb9d56..274bed4b003 100644 --- a/hermes_constants.py +++ b/hermes_constants.py @@ -6,6 +6,7 @@ without risk of circular imports. import os import shutil +import stat import sys import sysconfig from contextvars import ContextVar, Token @@ -229,16 +230,25 @@ def get_hermes_dir(new_subpath: str, old_name: str) -> Path: Existing installs that already have the old path (e.g. ``image_cache``) keep using it — no migration required. + A bare empty ``/`` directory does **not** count as "the + legacy install is in use" — install scaffolds, manual ``mkdir`` work, + and cleared-then-abandoned locations all create empty stubs that + would otherwise silently shadow real data populated at + ``/``. See #27602 for the pairing-store regression where + a dormant empty ``pairing/`` orphaned approved-user data in + ``platforms/pairing/``. + Args: new_subpath: Preferred path relative to HERMES_HOME (e.g. ``"cache/images"``). old_name: Legacy path relative to HERMES_HOME (e.g. ``"image_cache"``). Returns: - Absolute ``Path`` — old location if it exists on disk, otherwise the new one. + Absolute ``Path`` — legacy location if it exists with content, + otherwise the new location. """ home = get_hermes_home() old_path = home / old_name - if old_path.exists(): + if _legacy_path_has_content(old_path): return old_path return home / new_subpath @@ -563,6 +573,55 @@ def agent_browser_runnable(path: str | None) -> bool: return result.returncode == 0 +def _legacy_path_has_content(path: Path) -> bool: + """Return ``True`` iff ``path`` exists and has content worth honouring. + + A populated *directory* (any entry inside) counts. A non-directory + file at ``path`` also counts — the consumer presumably wrote it. + An empty directory does **not** count, so a stale empty + legacy stub falls through to the new layout. If the path cannot be + inspected (``PermissionError`` on ``stat``/``iterdir``, or any other + ``OSError`` short of "not found"), assume occupied so we don't + accidentally orphan legacy data. Only a genuine + ``FileNotFoundError`` counts as absent. + + Symlinks are resolved before judging content: a symlink pointing at a + populated directory (or any existing non-directory target) counts, but + a **dangling** symlink (broken target) does **not** — it must not be + allowed to shadow populated new-layout data, matching the old + ``exists()`` gate's behaviour for broken links. + """ + try: + st = path.lstat() + except FileNotFoundError: + return False + except OSError: + # PermissionError on a parent, or any other inspection failure: + # treat as occupied rather than silently orphaning legacy data. + return True + if stat.S_ISLNK(st.st_mode): + # Resolve the link's target. A dangling symlink has no content and + # must not shadow the new layout; a valid one is judged on its target. + try: + target_st = path.stat() # follows the link + except FileNotFoundError: + return False # dangling symlink → fall through to new layout + except OSError: + return True # can't resolve → assume occupied, don't orphan data + if not stat.S_ISDIR(target_st.st_mode): + return True + # target is a directory — fall through to the iterdir() emptiness check + elif not stat.S_ISDIR(st.st_mode): + return True + try: + next(path.iterdir()) + except StopIteration: + return False + except OSError: + return True + return True + + def display_hermes_home() -> str: """Return a user-friendly display string for the current HERMES_HOME. diff --git a/tests/test_hermes_constants.py b/tests/test_hermes_constants.py index 3c1f4870cf7..46ff5a3ce6b 100644 --- a/tests/test_hermes_constants.py +++ b/tests/test_hermes_constants.py @@ -13,6 +13,7 @@ from hermes_constants import ( find_node_executable, find_node_executable_on_path, get_default_hermes_root, + get_hermes_dir, get_hermes_home, heal_hermes_managed_node, hermes_managed_node_tree_present, @@ -609,3 +610,177 @@ class TestAgentBrowserRunnable: # the package at run time, so the validator trusts it without stat. assert agent_browser_runnable("npx agent-browser") is True assert agent_browser_runnable("/usr/local/bin/npx agent-browser") is True + + +class TestGetHermesDir: + """Tests for ``get_hermes_dir(new_subpath, old_name)``. + + Contract: prefer the legacy ``/`` location, but only when + it has content. An empty legacy stub must fall through to the new + layout so dormant install scaffolds don't orphan populated data at + ``/``. Regression guard for #27602. + """ + + def _set_home(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + def test_neither_exists_returns_new(self, tmp_path, monkeypatch): + self._set_home(tmp_path, monkeypatch) + result = get_hermes_dir("platforms/pairing", "pairing") + assert result == tmp_path / "platforms/pairing" + + def test_legacy_populated_returns_legacy(self, tmp_path, monkeypatch): + self._set_home(tmp_path, monkeypatch) + legacy = tmp_path / "image_cache" + legacy.mkdir() + (legacy / "cached.png").write_bytes(b"x") + result = get_hermes_dir("cache/images", "image_cache") + assert result == legacy + + def test_legacy_populated_with_subdir_returns_legacy(self, tmp_path, monkeypatch): + """Sub-directories count as content (e.g. nested cache layout).""" + self._set_home(tmp_path, monkeypatch) + legacy = tmp_path / "matrix" / "store" + legacy.mkdir(parents=True) + (legacy / "session").mkdir() # subdir, not a file + result = get_hermes_dir("platforms/matrix/store", "matrix/store") + assert result == legacy + + def test_legacy_empty_returns_new(self, tmp_path, monkeypatch): + """The #27602 regression: empty legacy dir orphans populated new dir. + + Without the fix, the resolver returned the empty legacy path + unconditionally, causing the pairing store to forget every + previously-approved user when an empty ``pairing/`` stub had + been pre-created at install time. + """ + self._set_home(tmp_path, monkeypatch) + legacy = tmp_path / "pairing" + legacy.mkdir() + # Populated new layout — this is the data that must not be orphaned. + new = tmp_path / "platforms" / "pairing" + new.mkdir(parents=True) + (new / "telegram-approved.json").write_text("[]") + result = get_hermes_dir("platforms/pairing", "pairing") + assert result == new + + def test_legacy_empty_and_new_missing_returns_new(self, tmp_path, monkeypatch): + """Empty legacy + no new yet — return the new path (will be created lazily). + + Slight behaviour change vs the old resolver (which would return the + empty legacy dir): the new path is what every consumer mkdirs into + when it doesn't exist, so the next write lands in the canonical + location instead of perpetuating the empty stub. + """ + self._set_home(tmp_path, monkeypatch) + legacy = tmp_path / "audio_cache" + legacy.mkdir() + result = get_hermes_dir("cache/audio", "audio_cache") + assert result == tmp_path / "cache/audio" + + def test_legacy_is_file_treated_as_content(self, tmp_path, monkeypatch): + """A non-directory file at the legacy path counts as occupied. + + Defensive against odd installs where the caller previously wrote a + single file instead of a directory. We honour whatever's there. + """ + self._set_home(tmp_path, monkeypatch) + legacy = tmp_path / "image_cache" + legacy.write_bytes(b"sentinel") + result = get_hermes_dir("cache/images", "image_cache") + assert result == legacy + + def test_unreadable_legacy_dir_kept(self, tmp_path, monkeypatch): + """If we can't enumerate the legacy dir, assume occupied — never + accidentally orphan legacy data on a transient permission error. + """ + self._set_home(tmp_path, monkeypatch) + legacy = tmp_path / "whatsapp" / "session" + legacy.mkdir(parents=True) + # Populate the new path too. The point is to verify that an + # OSError on iterdir does NOT fall through to the new layout. + new = tmp_path / "platforms" / "whatsapp" / "session" + new.mkdir(parents=True) + (new / "creds.json").write_text("{}") + + real_iterdir = Path.iterdir + + def boom(self): + if self == legacy: + raise PermissionError("simulated") + return real_iterdir(self) + + monkeypatch.setattr(Path, "iterdir", boom) + result = get_hermes_dir( + "platforms/whatsapp/session", "whatsapp/session" + ) + assert result == legacy + + def test_unstatable_legacy_dir_kept(self, tmp_path, monkeypatch): + """A ``PermissionError`` raised by the existence check itself (e.g. + an unreadable parent) must NOT be read as "absent". + + The old ``Path.exists()``/``Path.is_dir()`` gate swallowed + ``PermissionError`` and returned ``False``, so an unreadable legacy + dir fell through to the new layout and orphaned legacy data — + contradicting the docstring's "assume occupied on errors" intent. + With the ``lstat()``-based gate this raises and is caught as + occupied. Regression guard for the #27602 follow-up. + """ + self._set_home(tmp_path, monkeypatch) + legacy = tmp_path / "pairing" + legacy.mkdir() + # Populate the new path; it must NOT be selected. + new = tmp_path / "platforms" / "pairing" + new.mkdir(parents=True) + (new / "telegram-approved.json").write_text("[]") + + real_lstat = Path.lstat + + def boom(self): + if self == legacy: + raise PermissionError("simulated unreadable parent") + return real_lstat(self) + + monkeypatch.setattr(Path, "lstat", boom) + result = get_hermes_dir("platforms/pairing", "pairing") + assert result == legacy + + def test_dangling_legacy_symlink_returns_new(self, tmp_path, monkeypatch): + """A dangling legacy symlink must NOT shadow populated new-layout data. + + ``lstat()`` reports the link itself (not its missing target), so the + helper must resolve the link and treat a broken target as absent — + matching the old ``exists()`` gate, which followed the link and + returned False for a dangling one. Otherwise a stale broken symlink + would orphan real data (a stricter variant of the #27602 bug). + """ + self._set_home(tmp_path, monkeypatch) + legacy = tmp_path / "pairing" + legacy.symlink_to(tmp_path / "does-not-exist") + new = tmp_path / "platforms" / "pairing" + new.mkdir(parents=True) + (new / "discord-approved.json").write_text("[]") + result = get_hermes_dir("platforms/pairing", "pairing") + assert result == new + + def test_symlink_to_populated_dir_returns_legacy(self, tmp_path, monkeypatch): + """A legacy symlink pointing at a populated directory is honoured.""" + self._set_home(tmp_path, monkeypatch) + real = tmp_path / "real_store" + real.mkdir() + (real / "cached.png").write_bytes(b"x") + legacy = tmp_path / "image_cache" + legacy.symlink_to(real) + result = get_hermes_dir("cache/images", "image_cache") + assert result == legacy + + def test_symlink_to_empty_dir_returns_new(self, tmp_path, monkeypatch): + """A legacy symlink pointing at an EMPTY directory falls through.""" + self._set_home(tmp_path, monkeypatch) + empty = tmp_path / "empty_real" + empty.mkdir() + legacy = tmp_path / "audio_cache" + legacy.symlink_to(empty) + result = get_hermes_dir("cache/audio", "audio_cache") + assert result == tmp_path / "cache/audio" diff --git a/tests/tools/test_credential_files.py b/tests/tools/test_credential_files.py index ce44959585a..fcc4abcb1f1 100644 --- a/tests/tools/test_credential_files.py +++ b/tests/tools/test_credential_files.py @@ -400,12 +400,22 @@ class TestCacheDirectoryMounts: assert mounts[0]["container_path"] == "/root/.hermes/cache/documents" def test_legacy_dir_names_resolved(self, tmp_path, monkeypatch): - """Old-style dir names (e.g. document_cache) are resolved correctly.""" + """Old-style dir names (e.g. document_cache) are resolved correctly. + + Populates the legacy dirs with a sentinel file so they count as + ``has content`` for ``get_hermes_dir``'s populated-legacy check + (see #27602 — empty legacy stubs are no longer honoured). + """ hermes_home = tmp_path / ".hermes" hermes_home.mkdir() - # Use legacy dir name — get_hermes_dir prefers old if it exists - (hermes_home / "document_cache").mkdir() - (hermes_home / "image_cache").mkdir() + # Use legacy dir name with content — get_hermes_dir prefers + # populated old over new. + legacy_doc = hermes_home / "document_cache" + legacy_img = hermes_home / "image_cache" + legacy_doc.mkdir() + legacy_img.mkdir() + (legacy_doc / "cached.txt").write_bytes(b"x") + (legacy_img / "cached.png").write_bytes(b"x") monkeypatch.setenv("HERMES_HOME", str(hermes_home)) mounts = get_cache_directory_mounts()