fix(hermes-home): only honour legacy dir layout when it has content

get_hermes_dir(new_subpath, old_name) returned the legacy <old_name>/
location as soon as it existed on disk — even when empty. When an empty
legacy stub is created on a profile that already has populated data at
the new consolidated <new_subpath>/ (install scaffolds, profile init, a
stray mkdir, or ensure_hermes_home() recreating legacy dirs), the
resolver silently flipped to the empty legacy dir and the real data
became invisible. No log, no error — the feature behaved as if state was
wiped. Reproduced as a Discord pairing store losing every approved user
when an empty pairing/ shadowed the populated platforms/pairing/.

Resolve the legacy path only when it has content: a populated directory
(any entry) or a non-directory file counts; an empty directory falls
through to the new layout. Inspection failures (PermissionError on
lstat/iterdir, or any OSError short of FileNotFoundError) are treated as
"occupied" so a transient error never orphans legacy data — only a
genuine FileNotFoundError counts as absent. The lstat()-based gate also
fixes the prior exists()/is_dir() path swallowing PermissionError and
mis-reading an unreadable legacy dir as absent.

This hardens all 11+ call sites that share the resolver (pairing,
image/audio/video/document caches, matrix/whatsapp session stores,
vision/credential/tts/browser dirs).

Adds TestGetHermesDir regression coverage (empty/populated/subdir/file/
unreadable/unstatable cases) and updates test_credential_files to
populate its legacy dirs so they still count as content.

Closes #27602
Closes #27715
This commit is contained in:
briandevans 2026-06-27 04:43:32 +05:30 committed by kshitij
parent c377e954fb
commit 2d8c44ac87
3 changed files with 250 additions and 6 deletions

View file

@ -6,6 +6,7 @@ without risk of circular imports.
import os
import shutil
import stat
import sys
import sysconfig
from contextvars import ContextVar, Token
@ -229,16 +230,25 @@ def get_hermes_dir(new_subpath: str, old_name: str) -> Path:
Existing installs that already have the old path (e.g. ``image_cache``)
keep using it no migration required.
A bare empty ``<old_name>/`` directory does **not** count as "the
legacy install is in use" — install scaffolds, manual ``mkdir`` work,
and cleared-then-abandoned locations all create empty stubs that
would otherwise silently shadow real data populated at
``<new_subpath>/``. See #27602 for the pairing-store regression where
a dormant empty ``pairing/`` orphaned approved-user data in
``platforms/pairing/``.
Args:
new_subpath: Preferred path relative to HERMES_HOME (e.g. ``"cache/images"``).
old_name: Legacy path relative to HERMES_HOME (e.g. ``"image_cache"``).
Returns:
Absolute ``Path`` old location if it exists on disk, otherwise the new one.
Absolute ``Path`` legacy location if it exists with content,
otherwise the new location.
"""
home = get_hermes_home()
old_path = home / old_name
if old_path.exists():
if _legacy_path_has_content(old_path):
return old_path
return home / new_subpath
@ -563,6 +573,55 @@ def agent_browser_runnable(path: str | None) -> bool:
return result.returncode == 0
def _legacy_path_has_content(path: Path) -> bool:
"""Return ``True`` iff ``path`` exists and has content worth honouring.
A populated *directory* (any entry inside) counts. A non-directory
file at ``path`` also counts the consumer presumably wrote it.
An empty directory does **not** count, so a stale empty
legacy stub falls through to the new layout. If the path cannot be
inspected (``PermissionError`` on ``stat``/``iterdir``, or any other
``OSError`` short of "not found"), assume occupied so we don't
accidentally orphan legacy data. Only a genuine
``FileNotFoundError`` counts as absent.
Symlinks are resolved before judging content: a symlink pointing at a
populated directory (or any existing non-directory target) counts, but
a **dangling** symlink (broken target) does **not** it must not be
allowed to shadow populated new-layout data, matching the old
``exists()`` gate's behaviour for broken links.
"""
try:
st = path.lstat()
except FileNotFoundError:
return False
except OSError:
# PermissionError on a parent, or any other inspection failure:
# treat as occupied rather than silently orphaning legacy data.
return True
if stat.S_ISLNK(st.st_mode):
# Resolve the link's target. A dangling symlink has no content and
# must not shadow the new layout; a valid one is judged on its target.
try:
target_st = path.stat() # follows the link
except FileNotFoundError:
return False # dangling symlink → fall through to new layout
except OSError:
return True # can't resolve → assume occupied, don't orphan data
if not stat.S_ISDIR(target_st.st_mode):
return True
# target is a directory — fall through to the iterdir() emptiness check
elif not stat.S_ISDIR(st.st_mode):
return True
try:
next(path.iterdir())
except StopIteration:
return False
except OSError:
return True
return True
def display_hermes_home() -> str:
"""Return a user-friendly display string for the current HERMES_HOME.

View file

@ -13,6 +13,7 @@ from hermes_constants import (
find_node_executable,
find_node_executable_on_path,
get_default_hermes_root,
get_hermes_dir,
get_hermes_home,
heal_hermes_managed_node,
hermes_managed_node_tree_present,
@ -609,3 +610,177 @@ class TestAgentBrowserRunnable:
# the package at run time, so the validator trusts it without stat.
assert agent_browser_runnable("npx agent-browser") is True
assert agent_browser_runnable("/usr/local/bin/npx agent-browser") is True
class TestGetHermesDir:
"""Tests for ``get_hermes_dir(new_subpath, old_name)``.
Contract: prefer the legacy ``<old_name>/`` location, but only when
it has content. An empty legacy stub must fall through to the new
layout so dormant install scaffolds don't orphan populated data at
``<new_subpath>/``. Regression guard for #27602.
"""
def _set_home(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
def test_neither_exists_returns_new(self, tmp_path, monkeypatch):
self._set_home(tmp_path, monkeypatch)
result = get_hermes_dir("platforms/pairing", "pairing")
assert result == tmp_path / "platforms/pairing"
def test_legacy_populated_returns_legacy(self, tmp_path, monkeypatch):
self._set_home(tmp_path, monkeypatch)
legacy = tmp_path / "image_cache"
legacy.mkdir()
(legacy / "cached.png").write_bytes(b"x")
result = get_hermes_dir("cache/images", "image_cache")
assert result == legacy
def test_legacy_populated_with_subdir_returns_legacy(self, tmp_path, monkeypatch):
"""Sub-directories count as content (e.g. nested cache layout)."""
self._set_home(tmp_path, monkeypatch)
legacy = tmp_path / "matrix" / "store"
legacy.mkdir(parents=True)
(legacy / "session").mkdir() # subdir, not a file
result = get_hermes_dir("platforms/matrix/store", "matrix/store")
assert result == legacy
def test_legacy_empty_returns_new(self, tmp_path, monkeypatch):
"""The #27602 regression: empty legacy dir orphans populated new dir.
Without the fix, the resolver returned the empty legacy path
unconditionally, causing the pairing store to forget every
previously-approved user when an empty ``pairing/`` stub had
been pre-created at install time.
"""
self._set_home(tmp_path, monkeypatch)
legacy = tmp_path / "pairing"
legacy.mkdir()
# Populated new layout — this is the data that must not be orphaned.
new = tmp_path / "platforms" / "pairing"
new.mkdir(parents=True)
(new / "telegram-approved.json").write_text("[]")
result = get_hermes_dir("platforms/pairing", "pairing")
assert result == new
def test_legacy_empty_and_new_missing_returns_new(self, tmp_path, monkeypatch):
"""Empty legacy + no new yet — return the new path (will be created lazily).
Slight behaviour change vs the old resolver (which would return the
empty legacy dir): the new path is what every consumer mkdirs into
when it doesn't exist, so the next write lands in the canonical
location instead of perpetuating the empty stub.
"""
self._set_home(tmp_path, monkeypatch)
legacy = tmp_path / "audio_cache"
legacy.mkdir()
result = get_hermes_dir("cache/audio", "audio_cache")
assert result == tmp_path / "cache/audio"
def test_legacy_is_file_treated_as_content(self, tmp_path, monkeypatch):
"""A non-directory file at the legacy path counts as occupied.
Defensive against odd installs where the caller previously wrote a
single file instead of a directory. We honour whatever's there.
"""
self._set_home(tmp_path, monkeypatch)
legacy = tmp_path / "image_cache"
legacy.write_bytes(b"sentinel")
result = get_hermes_dir("cache/images", "image_cache")
assert result == legacy
def test_unreadable_legacy_dir_kept(self, tmp_path, monkeypatch):
"""If we can't enumerate the legacy dir, assume occupied — never
accidentally orphan legacy data on a transient permission error.
"""
self._set_home(tmp_path, monkeypatch)
legacy = tmp_path / "whatsapp" / "session"
legacy.mkdir(parents=True)
# Populate the new path too. The point is to verify that an
# OSError on iterdir does NOT fall through to the new layout.
new = tmp_path / "platforms" / "whatsapp" / "session"
new.mkdir(parents=True)
(new / "creds.json").write_text("{}")
real_iterdir = Path.iterdir
def boom(self):
if self == legacy:
raise PermissionError("simulated")
return real_iterdir(self)
monkeypatch.setattr(Path, "iterdir", boom)
result = get_hermes_dir(
"platforms/whatsapp/session", "whatsapp/session"
)
assert result == legacy
def test_unstatable_legacy_dir_kept(self, tmp_path, monkeypatch):
"""A ``PermissionError`` raised by the existence check itself (e.g.
an unreadable parent) must NOT be read as "absent".
The old ``Path.exists()``/``Path.is_dir()`` gate swallowed
``PermissionError`` and returned ``False``, so an unreadable legacy
dir fell through to the new layout and orphaned legacy data
contradicting the docstring's "assume occupied on errors" intent.
With the ``lstat()``-based gate this raises and is caught as
occupied. Regression guard for the #27602 follow-up.
"""
self._set_home(tmp_path, monkeypatch)
legacy = tmp_path / "pairing"
legacy.mkdir()
# Populate the new path; it must NOT be selected.
new = tmp_path / "platforms" / "pairing"
new.mkdir(parents=True)
(new / "telegram-approved.json").write_text("[]")
real_lstat = Path.lstat
def boom(self):
if self == legacy:
raise PermissionError("simulated unreadable parent")
return real_lstat(self)
monkeypatch.setattr(Path, "lstat", boom)
result = get_hermes_dir("platforms/pairing", "pairing")
assert result == legacy
def test_dangling_legacy_symlink_returns_new(self, tmp_path, monkeypatch):
"""A dangling legacy symlink must NOT shadow populated new-layout data.
``lstat()`` reports the link itself (not its missing target), so the
helper must resolve the link and treat a broken target as absent
matching the old ``exists()`` gate, which followed the link and
returned False for a dangling one. Otherwise a stale broken symlink
would orphan real data (a stricter variant of the #27602 bug).
"""
self._set_home(tmp_path, monkeypatch)
legacy = tmp_path / "pairing"
legacy.symlink_to(tmp_path / "does-not-exist")
new = tmp_path / "platforms" / "pairing"
new.mkdir(parents=True)
(new / "discord-approved.json").write_text("[]")
result = get_hermes_dir("platforms/pairing", "pairing")
assert result == new
def test_symlink_to_populated_dir_returns_legacy(self, tmp_path, monkeypatch):
"""A legacy symlink pointing at a populated directory is honoured."""
self._set_home(tmp_path, monkeypatch)
real = tmp_path / "real_store"
real.mkdir()
(real / "cached.png").write_bytes(b"x")
legacy = tmp_path / "image_cache"
legacy.symlink_to(real)
result = get_hermes_dir("cache/images", "image_cache")
assert result == legacy
def test_symlink_to_empty_dir_returns_new(self, tmp_path, monkeypatch):
"""A legacy symlink pointing at an EMPTY directory falls through."""
self._set_home(tmp_path, monkeypatch)
empty = tmp_path / "empty_real"
empty.mkdir()
legacy = tmp_path / "audio_cache"
legacy.symlink_to(empty)
result = get_hermes_dir("cache/audio", "audio_cache")
assert result == tmp_path / "cache/audio"

View file

@ -400,12 +400,22 @@ class TestCacheDirectoryMounts:
assert mounts[0]["container_path"] == "/root/.hermes/cache/documents"
def test_legacy_dir_names_resolved(self, tmp_path, monkeypatch):
"""Old-style dir names (e.g. document_cache) are resolved correctly."""
"""Old-style dir names (e.g. document_cache) are resolved correctly.
Populates the legacy dirs with a sentinel file so they count as
``has content`` for ``get_hermes_dir``'s populated-legacy check
(see #27602 — empty legacy stubs are no longer honoured).
"""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
# Use legacy dir name — get_hermes_dir prefers old if it exists
(hermes_home / "document_cache").mkdir()
(hermes_home / "image_cache").mkdir()
# Use legacy dir name with content — get_hermes_dir prefers
# populated old over new.
legacy_doc = hermes_home / "document_cache"
legacy_img = hermes_home / "image_cache"
legacy_doc.mkdir()
legacy_img.mkdir()
(legacy_doc / "cached.txt").write_bytes(b"x")
(legacy_img / "cached.png").write_bytes(b"x")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
mounts = get_cache_directory_mounts()