fix(skills): make content_hash filename-sensitive too (symmetric with bundle_content_hash)

PR #6656 added rel_path + \x00 prefixing to ``bundle_content_hash`` so a
filename swap between two files in a bundle changes the digest. But it
only patched the in-memory side — ``content_hash`` in ``tools/skills_guard.py``
(the on-disk equivalent) still hashed file contents only.

These two functions need to stay symmetric: ``check_for_skill_updates``
compares the disk hash of an installed skill against the bundle hash
of the upstream copy. With the asymmetric fix, every clean install
showed as drifted because the digests no longer matched
(2 existing tests in ``test_skills_hub.py`` started failing as soon as
the contributor's change landed).

Apply the same ``rel_path + \x00 + content`` shape to the disk-side
function. Both functions now produce the same digest for the same skill
content laid out two ways. Documented the symmetry invariant in the
docstring so a future change to either function knows to touch both.

Also adds tests/tools/test_pr_6656_regressions.py with 10 regression
tests covering all three fixes salvaged in PR #6656:
  - uninstall_skill path traversal (4 cases: parent segments, absolute
    paths, symlink escape, legitimate skill)
  - bundle_content_hash filename swap detection (4 cases: in-memory
    swap, identity, disk-side swap, bundle↔disk symmetry)
  - list_pending lock contract (2 cases: source-grep contract, smoke)

Also fixes AUTHOR_MAP entry for @aaronlab — their commit email
(1115117931@qq.com) maps to "aaronagent" which isn't a real GitHub
login, so changelog @mentions would 404.
This commit is contained in:
Teknium 2026-05-22 19:55:58 -07:00
parent b82608a6f5
commit 3f78d8073c
3 changed files with 301 additions and 2 deletions

View file

@ -646,7 +646,7 @@ AUTHOR_MAP = {
"beibei1988@proton.me": "beibi9966",
# ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
# crossref, and GH contributor list matching (April 2026 audit) ──
"1115117931@qq.com": "aaronagent",
"1115117931@qq.com": "aaronlab",
"1506751656@qq.com": "hqhq1025",
"364939526@qq.com": "luyao618",
"hgk324@gmail.com": "houziershi",

View file

@ -0,0 +1,287 @@
"""Regression tests for PR #6656 — skill uninstall + bundle hash + pairing lock.
Three independent fixes that were salvaged together:
1. ``uninstall_skill`` path traversal: ``install_path`` comes from a JSON
file on disk; a malicious skill could write ``install_path: "../../"``
and trigger ``shutil.rmtree`` against parent directories. Guarded with
``Path.resolve().is_relative_to(SKILLS_DIR.resolve())``.
2. ``bundle_content_hash`` / ``content_hash`` filename inclusion: the
previous hash mixed only file CONTENTS, so swapping ``SKILL.md`` and
``scripts/run.sh`` contents between two paths produced the same digest.
Now both functions prefix each entry with ``rel_path + \\x00`` and
stay symmetric (one on disk, one on in-memory bundle).
3. ``PairingStore.list_pending`` TOCTOU: previously called
``_cleanup_expired`` (which writes the JSON file) without holding
``self._lock``, racing with ``generate_code`` / ``approve_code``.
"""
from __future__ import annotations
import json
from pathlib import Path
from unittest.mock import patch
import pytest
from tools.skills_hub import (
SkillBundle,
bundle_content_hash,
uninstall_skill,
)
from tools.skills_guard import content_hash
# =============================================================================
# uninstall_skill: path traversal guard
# =============================================================================
class TestUninstallPathTraversal:
"""The ``install_path`` field in ``lock.json`` is attacker-controllable
if a malicious skill is ever installed (or if the hub's lockfile is
corrupted). The uninstall path must refuse anything that resolves
outside ``SKILLS_DIR``.
"""
@pytest.fixture
def hub_setup(self, tmp_path, monkeypatch):
"""Build a hub directory tree with a malicious lock.json entry.
``HubLockFile`` binds its default ``path`` argument at def time
against the module-level ``LOCK_FILE`` constant, so monkey-patching
``LOCK_FILE`` alone is not enough we also need to rebind the
function default. Patching ``HubLockFile.__init__.__defaults__``
is the standard tool for this.
"""
import tools.skills_hub as hub
skills_dir = tmp_path / "skills"
hub_dir = skills_dir / ".hub"
hub_dir.mkdir(parents=True)
lock_path = hub_dir / "lock.json"
monkeypatch.setattr(hub, "SKILLS_DIR", skills_dir)
monkeypatch.setattr(hub, "HUB_DIR", hub_dir)
monkeypatch.setattr(hub, "LOCK_FILE", lock_path)
monkeypatch.setattr(hub, "AUDIT_LOG", hub_dir / "audit.log")
# Rebind HubLockFile.__init__'s default `path=` arg so
# `HubLockFile()` (no args) picks up the new lock path.
monkeypatch.setattr(
hub.HubLockFile.__init__,
"__defaults__",
(lock_path,),
)
# A real directory outside skills_dir that the traversal would
# delete if the guard fails.
victim = tmp_path / "do-not-delete"
victim.mkdir()
(victim / "important.txt").write_text("data")
return skills_dir, hub_dir, victim
def _write_lock(self, hub_dir: Path, entries: dict) -> None:
lock_path = hub_dir / "lock.json"
lock_path.write_text(json.dumps({"version": 1, "installed": entries}))
def test_traversal_via_parent_segments_rejected(self, hub_setup):
"""install_path: "../do-not-delete" must NOT escape SKILLS_DIR."""
skills_dir, hub_dir, victim = hub_setup
self._write_lock(hub_dir, {
"evil": {
"install_path": "../do-not-delete",
"source": "https://example.com",
"version": "1.0",
},
})
ok, msg = uninstall_skill("evil")
assert ok is False
assert "outside" in msg or "resolves" in msg or "skills directory" in msg
# The victim directory MUST still exist.
assert victim.exists()
assert (victim / "important.txt").exists()
def test_absolute_path_rejected(self, hub_setup):
"""install_path that's an absolute path outside SKILLS_DIR must be refused."""
skills_dir, hub_dir, victim = hub_setup
self._write_lock(hub_dir, {
"evil": {
"install_path": str(victim),
"source": "https://example.com",
"version": "1.0",
},
})
ok, msg = uninstall_skill("evil")
# SKILLS_DIR / "<absolute>" still results in an absolute path,
# which when resolved is outside skills_dir. Must be refused.
assert ok is False
assert victim.exists()
def test_symlink_escape_rejected(self, tmp_path, hub_setup):
"""Symlinks inside SKILLS_DIR that point outside must be refused
after realpath resolution."""
skills_dir, hub_dir, victim = hub_setup
# Create a "skill" that's actually a symlink to victim
evil_link = skills_dir / "trapdoor"
evil_link.symlink_to(victim)
self._write_lock(hub_dir, {
"trap": {
"install_path": "trapdoor",
"source": "https://example.com",
"version": "1.0",
},
})
ok, msg = uninstall_skill("trap")
# realpath resolves the symlink → outside skills_dir → refused.
assert ok is False
assert victim.exists()
assert (victim / "important.txt").exists()
def test_legitimate_skill_uninstall_still_works(self, hub_setup):
"""The guard must NOT block a normal skill directory inside SKILLS_DIR."""
skills_dir, hub_dir, _victim = hub_setup
legit = skills_dir / "category" / "my-skill"
legit.mkdir(parents=True)
(legit / "SKILL.md").write_text("test")
self._write_lock(hub_dir, {
"my-skill": {
"install_path": "category/my-skill",
"source": "https://example.com",
"trust_level": "community",
"version": "1.0",
},
})
ok, msg = uninstall_skill("my-skill")
assert ok is True
assert not legit.exists()
# =============================================================================
# Bundle / disk hash symmetry + filename inclusion
# =============================================================================
class TestBundleHashFilenameSensitivity:
"""Hashes must change when filenames are swapped, even if combined
contents stay identical. ``bundle_content_hash`` (in-memory) and
``content_hash`` (on-disk) must stay symmetric they're used to
detect skill drift between an installed bundle and its source.
"""
def _make_bundle(self, files: dict) -> SkillBundle:
return SkillBundle(
name="test",
files=files,
source="test",
identifier="test/test",
trust_level="community",
)
def test_filename_swap_changes_hash(self):
"""Swapping content between SKILL.md and scripts/run.sh must
produce a different hash. Without the filename in the hash,
these two bundles would have looked identical."""
a = self._make_bundle({"SKILL.md": "hello", "scripts/run.sh": "world"})
b = self._make_bundle({"SKILL.md": "world", "scripts/run.sh": "hello"})
assert bundle_content_hash(a) != bundle_content_hash(b)
def test_identical_bundles_same_hash(self):
"""Sanity: equal content + paths = equal hash."""
a = self._make_bundle({"SKILL.md": "x", "run.sh": "y"})
b = self._make_bundle({"SKILL.md": "x", "run.sh": "y"})
assert bundle_content_hash(a) == bundle_content_hash(b)
def test_disk_hash_changes_on_filename_swap(self, tmp_path):
"""``content_hash`` on disk must also be filename-sensitive,
so it stays symmetric with ``bundle_content_hash``."""
skill_a = tmp_path / "a"
skill_a.mkdir()
(skill_a / "SKILL.md").write_text("hello")
(skill_a / "run.sh").write_text("world")
skill_b = tmp_path / "b"
skill_b.mkdir()
(skill_b / "SKILL.md").write_text("world")
(skill_b / "run.sh").write_text("hello")
# Different filename↔content mappings = different hashes.
assert content_hash(skill_a) != content_hash(skill_b)
def test_bundle_and_disk_hash_match(self, tmp_path):
"""Symmetry contract: the same skill, expressed as a SkillBundle
and as a directory tree, must produce the same digest. If this
fails, ``check_for_skill_updates`` will flag every clean
install as drifted."""
skill_dir = tmp_path / "skill"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text("hello")
(skill_dir / "scripts").mkdir()
(skill_dir / "scripts" / "run.sh").write_text("world")
bundle = self._make_bundle({
"SKILL.md": "hello",
"scripts/run.sh": "world",
})
assert bundle_content_hash(bundle) == content_hash(skill_dir)
# =============================================================================
# PairingStore.list_pending: must hold the lock
# =============================================================================
class TestListPendingLock:
"""list_pending writes via _cleanup_expired. Without the lock,
a concurrent generate_code or approve_code can race against the
write, potentially clobbering a pending approval."""
def test_list_pending_acquires_lock(self, tmp_path):
"""Source-grep contract: ``list_pending`` body must be wrapped
in ``with self._lock:``. If anyone unwraps it again, the TOCTOU
bug returns."""
import gateway.pairing as _pairing_mod
source = Path(_pairing_mod.__file__).read_text(encoding="utf-8")
# Find the list_pending function body and assert the lock
# context manager appears inside it. We grep the function
# source rather than runtime-introspect because the racy
# behaviour is hard to deterministically reproduce in a test.
lines = source.splitlines()
in_func = False
seen_lock = False
for line in lines:
if line.startswith(" def list_pending("):
in_func = True
continue
if in_func:
if line.startswith(" def "):
break # next function
if "with self._lock:" in line:
seen_lock = True
break
assert seen_lock, (
"list_pending must wrap its body in `with self._lock:` — "
"without it, _cleanup_expired's file write races with "
"concurrent generate_code/approve_code."
)
def test_list_pending_returns_correct_data(self, tmp_path):
"""End-to-end smoke: even with the lock held, basic operation works."""
from gateway.pairing import PairingStore
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
store = PairingStore()
store.generate_code("telegram", "user1", "Alice")
pending = store.list_pending("telegram")
assert len(pending) == 1
assert pending[0]["user_id"] == "user1"

View file

@ -717,12 +717,24 @@ def format_scan_report(result: ScanResult) -> str:
def content_hash(skill_path: Path) -> str:
"""Compute a SHA-256 hash of all files in a skill directory for integrity tracking."""
"""Compute a SHA-256 hash of all files in a skill directory for integrity tracking.
File paths (relative to ``skill_path``) are mixed into the hash alongside
file contents so that swapping the contents of two files in a skill
changes the hash. This must stay symmetric with
``tools.skills_hub.bundle_content_hash`` both functions need to
produce the same digest for the same skill (one operates on disk,
one on an in-memory bundle), so any change to the hash shape MUST
land in both places at once.
"""
h = hashlib.sha256()
if skill_path.is_dir():
for f in sorted(skill_path.rglob("*")):
if f.is_file():
try:
rel = f.relative_to(skill_path).as_posix()
h.update(rel.encode("utf-8"))
h.update(b"\x00")
h.update(f.read_bytes())
except OSError:
continue