From f6fac60e662ee14d95842a4eb21d6ce575417956 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Thu, 18 Jun 2026 12:41:55 +0530 Subject: [PATCH] refactor(skills): dedupe file-listing, share user-modified predicate, trim diff contract Cleanup pass on the salvage (behavior-preserving): - diff_bundled_skill now uses the existing _skill_file_list() helper instead of reimplementing the rglob/is_file/relative_to file-set enumeration inline (twice). - Extract _is_tracked_user_modification(origin_hash, user_hash) and use it in BOTH the sync loop and list_user_modified_bundled_skills() so the 'kept user edit' rule can't drift between the two sites. - _read_text_for_diff -> _read_for_diff returns (bytes, text); the binary branch now compares the bytes it already read instead of re-reading both files from disk. - Drop the unused 'user_present' key from diff_bundled_skill's return contract (no consumer or test ever read it). - test_update_modified_notice: drop the brittle '>= 2 sites' count-floor so consolidating the two print paths into a shared helper stays a welcome refactor; keep the per-site 'count notice => discovery hint' invariant (still mutation-tested). --- .../hermes_cli/test_update_modified_notice.py | 17 ++--- tools/skills_sync.py | 63 +++++++++++-------- 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/tests/hermes_cli/test_update_modified_notice.py b/tests/hermes_cli/test_update_modified_notice.py index 37e41c73be4..f2980206985 100644 --- a/tests/hermes_cli/test_update_modified_notice.py +++ b/tests/hermes_cli/test_update_modified_notice.py @@ -32,19 +32,22 @@ def test_every_user_modified_notice_points_at_list_modified(): lines = _source_lines() count_sites = [i for i, ln in enumerate(lines) if _COUNT_RE.search(ln)] - # There are at least two such notices today; the bug was that only one of - # them carried the discovery hint. Assert each is followed (within a small - # window — the count print + the hint print) by the list-modified pointer. - assert len(count_sites) >= 2, ( - "expected at least two 'user-modified (kept)' notices in main.py; " - f"found {len(count_sites)}" + # The notice must exist somewhere (guard against it being deleted outright), + # but we deliberately do NOT assert a fixed *count* of sites: consolidating + # the duplicated print paths into a shared helper is a welcome refactor and + # must not fail this test. The invariant is per-site, not how many sites. + assert count_sites, ( + "no 'user-modified (kept)' notice found in main.py — the update " + "summary that surfaces kept user edits appears to have been removed" ) for idx in count_sites: + # The count print and its discovery hint sit on adjacent lines; allow a + # small window so wording/formatting tweaks don't break the check. window = "\n".join(lines[idx : idx + 5]) assert _HINT_RE.search(window), ( "a 'user-modified (kept)' notice near line " f"{idx + 1} of main.py does not point users at " "`hermes skills list-modified` within the following lines — the " - "two update paths have drifted apart again:\n" + window + "update paths have drifted apart again:\n" + window ) diff --git a/tools/skills_sync.py b/tools/skills_sync.py index 548f0dbb1db..091c56e2d4e 100644 --- a/tools/skills_sync.py +++ b/tools/skills_sync.py @@ -575,7 +575,7 @@ def sync_skills(quiet: bool = False) -> dict: skipped += 1 continue - if user_hash != origin_hash: + if _is_tracked_user_modification(origin_hash, user_hash): # User modified this skill — don't overwrite their changes user_modified.append(skill_name) if not quiet: @@ -785,6 +785,18 @@ def reset_bundled_skill(name: str, restore: bool = False) -> dict: return {"ok": True, "action": action, "message": message, "synced": synced} +def _is_tracked_user_modification(origin_hash: str, user_hash: str) -> bool: + """Whether an on-disk skill counts as a user modification ``hermes update`` keeps. + + Shared by the sync loop (which decides what to skip) and + ``list_user_modified_bundled_skills`` (which surfaces the names) so the two + can never drift. A skill is a tracked modification only when it has a + recorded origin hash (an un-baselined / v1 entry with an empty hash is not) + and its current content hash differs from that origin. + """ + return bool(origin_hash) and user_hash != origin_hash + + def list_user_modified_bundled_skills() -> List[dict]: """Return the bundled skills that ``hermes update`` keeps because the user edited them locally. @@ -806,7 +818,7 @@ def list_user_modified_bundled_skills() -> List[dict]: bundled_dir = _get_bundled_dir() modified: List[dict] = [] for skill_name, skill_dir in _discover_bundled_skills(bundled_dir): - origin_hash = manifest.get(skill_name) + origin_hash = manifest.get(skill_name, "") # No entry, or a v1 entry not yet baselined (empty hash): not a tracked # modification — the next sync handles it. if not origin_hash: @@ -814,7 +826,7 @@ def list_user_modified_bundled_skills() -> List[dict]: dest = _compute_relative_dest(skill_dir, bundled_dir) if not dest.exists(): continue - if _dir_hash(dest) != origin_hash: + if _is_tracked_user_modification(origin_hash, _dir_hash(dest)): modified.append( {"name": skill_name, "dest": dest, "bundled_src": skill_dir} ) @@ -822,18 +834,23 @@ def list_user_modified_bundled_skills() -> List[dict]: return modified -def _read_text_for_diff(path: Path) -> Optional[str]: - """Return file text for diffing, or ``None`` if the file is binary/unreadable.""" +def _read_for_diff(path: Path) -> Tuple[Optional[bytes], Optional[str]]: + """Read a file once for diffing. + + Returns ``(raw_bytes, text)`` where ``text`` is ``None`` if the file is + binary; ``(None, None)`` if it could not be read. Returning the raw bytes + lets the caller compare binary files without re-reading them. + """ try: data = path.read_bytes() except OSError: - return None + return None, None if b"\x00" in data: - return None + return data, None try: - return data.decode("utf-8") + return data, data.decode("utf-8") except UnicodeDecodeError: - return None + return data, None def diff_bundled_skill(name: str) -> dict: @@ -844,7 +861,7 @@ def diff_bundled_skill(name: str) -> dict: Returns a dict: ``ok`` (bool), ``name`` (str), ``found`` (bool — bundled source exists), - ``user_present`` (bool), ``modified`` (bool), ``message`` (str), + ``modified`` (bool), ``message`` (str), ``diffs``: list of ``{"path": str, "status": str, "diff": str}`` where status is one of ``modified`` / ``added`` (only in user copy) / ``removed`` (only in bundled) / ``binary``. @@ -859,7 +876,6 @@ def diff_bundled_skill(name: str) -> dict: "ok": False, "name": name, "found": False, - "user_present": False, "modified": False, "diffs": [], "message": ( @@ -873,32 +889,30 @@ def diff_bundled_skill(name: str) -> dict: "ok": False, "name": name, "found": True, - "user_present": False, "modified": False, "diffs": [], "message": f"No local copy of '{name}' found at {dest}.", } - user_files = { - p.relative_to(dest).as_posix() for p in dest.rglob("*") if p.is_file() - } - stock_files = { - p.relative_to(bundled_src).as_posix() - for p in bundled_src.rglob("*") - if p.is_file() - } + user_files = set(_skill_file_list(dest)) + stock_files = set(_skill_file_list(bundled_src)) diffs: List[dict] = [] for rel in sorted(user_files | stock_files): in_user = rel in user_files in_stock = rel in stock_files - user_text = _read_text_for_diff(dest / rel) if in_user else None - stock_text = _read_text_for_diff(bundled_src / rel) if in_stock else None + user_bytes, user_text = ( + _read_for_diff(dest / rel) if in_user else (None, None) + ) + stock_bytes, stock_text = ( + _read_for_diff(bundled_src / rel) if in_stock else (None, None) + ) if in_user and in_stock: if user_text is None or stock_text is None: - # At least one side is binary — report only if bytes differ. - if (dest / rel).read_bytes() != (bundled_src / rel).read_bytes(): + # At least one side is binary — report only if bytes differ + # (reuse the bytes already read above, no second read). + if user_bytes != stock_bytes: diffs.append( {"path": rel, "status": "binary", "diff": ""} ) @@ -928,7 +942,6 @@ def diff_bundled_skill(name: str) -> dict: "ok": True, "name": name, "found": True, - "user_present": True, "modified": modified, "diffs": diffs, "message": (