refactor(skills): dedupe file-listing, share user-modified predicate, trim diff contract

Cleanup pass on the salvage (behavior-preserving):

- diff_bundled_skill now uses the existing _skill_file_list() helper
  instead of reimplementing the rglob/is_file/relative_to file-set
  enumeration inline (twice).
- Extract _is_tracked_user_modification(origin_hash, user_hash) and use
  it in BOTH the sync loop and list_user_modified_bundled_skills() so the
  'kept user edit' rule can't drift between the two sites.
- _read_text_for_diff -> _read_for_diff returns (bytes, text); the binary
  branch now compares the bytes it already read instead of re-reading
  both files from disk.
- Drop the unused 'user_present' key from diff_bundled_skill's return
  contract (no consumer or test ever read it).
- test_update_modified_notice: drop the brittle '>= 2 sites' count-floor
  so consolidating the two print paths into a shared helper stays a
  welcome refactor; keep the per-site 'count notice => discovery hint'
  invariant (still mutation-tested).
This commit is contained in:
kshitijk4poor 2026-06-18 12:41:55 +05:30
parent 737007e335
commit f6fac60e66
2 changed files with 48 additions and 32 deletions

View file

@ -32,19 +32,22 @@ def test_every_user_modified_notice_points_at_list_modified():
lines = _source_lines()
count_sites = [i for i, ln in enumerate(lines) if _COUNT_RE.search(ln)]
# There are at least two such notices today; the bug was that only one of
# them carried the discovery hint. Assert each is followed (within a small
# window — the count print + the hint print) by the list-modified pointer.
assert len(count_sites) >= 2, (
"expected at least two 'user-modified (kept)' notices in main.py; "
f"found {len(count_sites)}"
# The notice must exist somewhere (guard against it being deleted outright),
# but we deliberately do NOT assert a fixed *count* of sites: consolidating
# the duplicated print paths into a shared helper is a welcome refactor and
# must not fail this test. The invariant is per-site, not how many sites.
assert count_sites, (
"no 'user-modified (kept)' notice found in main.py — the update "
"summary that surfaces kept user edits appears to have been removed"
)
for idx in count_sites:
# The count print and its discovery hint sit on adjacent lines; allow a
# small window so wording/formatting tweaks don't break the check.
window = "\n".join(lines[idx : idx + 5])
assert _HINT_RE.search(window), (
"a 'user-modified (kept)' notice near line "
f"{idx + 1} of main.py does not point users at "
"`hermes skills list-modified` within the following lines — the "
"two update paths have drifted apart again:\n" + window
"update paths have drifted apart again:\n" + window
)

View file

@ -575,7 +575,7 @@ def sync_skills(quiet: bool = False) -> dict:
skipped += 1
continue
if user_hash != origin_hash:
if _is_tracked_user_modification(origin_hash, user_hash):
# User modified this skill — don't overwrite their changes
user_modified.append(skill_name)
if not quiet:
@ -785,6 +785,18 @@ def reset_bundled_skill(name: str, restore: bool = False) -> dict:
return {"ok": True, "action": action, "message": message, "synced": synced}
def _is_tracked_user_modification(origin_hash: str, user_hash: str) -> bool:
"""Whether an on-disk skill counts as a user modification ``hermes update`` keeps.
Shared by the sync loop (which decides what to skip) and
``list_user_modified_bundled_skills`` (which surfaces the names) so the two
can never drift. A skill is a tracked modification only when it has a
recorded origin hash (an un-baselined / v1 entry with an empty hash is not)
and its current content hash differs from that origin.
"""
return bool(origin_hash) and user_hash != origin_hash
def list_user_modified_bundled_skills() -> List[dict]:
"""Return the bundled skills that ``hermes update`` keeps because the user
edited them locally.
@ -806,7 +818,7 @@ def list_user_modified_bundled_skills() -> List[dict]:
bundled_dir = _get_bundled_dir()
modified: List[dict] = []
for skill_name, skill_dir in _discover_bundled_skills(bundled_dir):
origin_hash = manifest.get(skill_name)
origin_hash = manifest.get(skill_name, "")
# No entry, or a v1 entry not yet baselined (empty hash): not a tracked
# modification — the next sync handles it.
if not origin_hash:
@ -814,7 +826,7 @@ def list_user_modified_bundled_skills() -> List[dict]:
dest = _compute_relative_dest(skill_dir, bundled_dir)
if not dest.exists():
continue
if _dir_hash(dest) != origin_hash:
if _is_tracked_user_modification(origin_hash, _dir_hash(dest)):
modified.append(
{"name": skill_name, "dest": dest, "bundled_src": skill_dir}
)
@ -822,18 +834,23 @@ def list_user_modified_bundled_skills() -> List[dict]:
return modified
def _read_text_for_diff(path: Path) -> Optional[str]:
"""Return file text for diffing, or ``None`` if the file is binary/unreadable."""
def _read_for_diff(path: Path) -> Tuple[Optional[bytes], Optional[str]]:
"""Read a file once for diffing.
Returns ``(raw_bytes, text)`` where ``text`` is ``None`` if the file is
binary; ``(None, None)`` if it could not be read. Returning the raw bytes
lets the caller compare binary files without re-reading them.
"""
try:
data = path.read_bytes()
except OSError:
return None
return None, None
if b"\x00" in data:
return None
return data, None
try:
return data.decode("utf-8")
return data, data.decode("utf-8")
except UnicodeDecodeError:
return None
return data, None
def diff_bundled_skill(name: str) -> dict:
@ -844,7 +861,7 @@ def diff_bundled_skill(name: str) -> dict:
Returns a dict:
``ok`` (bool), ``name`` (str), ``found`` (bool bundled source exists),
``user_present`` (bool), ``modified`` (bool), ``message`` (str),
``modified`` (bool), ``message`` (str),
``diffs``: list of ``{"path": str, "status": str, "diff": str}`` where
status is one of ``modified`` / ``added`` (only in user copy) /
``removed`` (only in bundled) / ``binary``.
@ -859,7 +876,6 @@ def diff_bundled_skill(name: str) -> dict:
"ok": False,
"name": name,
"found": False,
"user_present": False,
"modified": False,
"diffs": [],
"message": (
@ -873,32 +889,30 @@ def diff_bundled_skill(name: str) -> dict:
"ok": False,
"name": name,
"found": True,
"user_present": False,
"modified": False,
"diffs": [],
"message": f"No local copy of '{name}' found at {dest}.",
}
user_files = {
p.relative_to(dest).as_posix() for p in dest.rglob("*") if p.is_file()
}
stock_files = {
p.relative_to(bundled_src).as_posix()
for p in bundled_src.rglob("*")
if p.is_file()
}
user_files = set(_skill_file_list(dest))
stock_files = set(_skill_file_list(bundled_src))
diffs: List[dict] = []
for rel in sorted(user_files | stock_files):
in_user = rel in user_files
in_stock = rel in stock_files
user_text = _read_text_for_diff(dest / rel) if in_user else None
stock_text = _read_text_for_diff(bundled_src / rel) if in_stock else None
user_bytes, user_text = (
_read_for_diff(dest / rel) if in_user else (None, None)
)
stock_bytes, stock_text = (
_read_for_diff(bundled_src / rel) if in_stock else (None, None)
)
if in_user and in_stock:
if user_text is None or stock_text is None:
# At least one side is binary — report only if bytes differ.
if (dest / rel).read_bytes() != (bundled_src / rel).read_bytes():
# At least one side is binary — report only if bytes differ
# (reuse the bytes already read above, no second read).
if user_bytes != stock_bytes:
diffs.append(
{"path": rel, "status": "binary", "diff": "<binary file differs>"}
)
@ -928,7 +942,6 @@ def diff_bundled_skill(name: str) -> dict:
"ok": True,
"name": name,
"found": True,
"user_present": True,
"modified": modified,
"diffs": diffs,
"message": (