refactor(skills): slim AST diagnostic to single entry point

Trim ~600 LOC off the original contribution while keeping the same
operator-facing surface and detection coverage.

- Collapse three entry points (file / dir / bundle) into one
  ast_scan_path(path) that handles both files and directories.
- Drop AstFinding dataclass + severity field — replaced with plain
  (file, line, pattern_id, description) tuples. Severity ordering was
  display-only for a diagnostic that explicitly disclaims security
  verdicts, so the field added bookkeeping without earning its place.
- Replace Rich-markup formatter with plain text grouped by file.
- Drop the 'inspect --ast-deep' surface — same scanner, same output as
  'audit --deep', single CLI entry is enough. Operators audit after
  install; pre-install inspection signal isn't worth the second surface.
- Trim test file to the cases that earn their place: bypass payload,
  syntax error survival, RecursionError survival, false-positive guard
  (importer lookalike), literal-arg false-positive guard, non-.py
  ignored, directory recursion + cache-dir skipping, missing-path,
  getattr/__dict__ detection, formatter empty + populated.

Net: tools/skills_ast_audit.py 353 -> 133 LOC,
tests/tools/test_skills_ast_audit.py 299 -> 103 LOC, full diff
+704/-12 -> +264/-6. No change to tools/skills_guard.py — Skills Guard
verdicts remain untouched per SECURITY.md §2.4.
This commit is contained in:
teknium1 2026-05-23 16:36:37 -07:00 committed by Teknium
parent 7255050c99
commit 4254f7dd17
4 changed files with 175 additions and 609 deletions

View file

@ -1,299 +1,103 @@
"""
Tests for tools.skills_ast_audit the opt-in AST diagnostic scanner.
These tests verify detection of dynamic import/access patterns that can
bypass line-by-line regex scanning, without crashing on hostile or
pathological input.
"""
"""Tests for tools.skills_ast_audit — opt-in AST diagnostic scanner."""
import sys
from pathlib import Path
from tools.skills_ast_audit import (
AstFinding,
ast_scan_bundle_files,
ast_scan_file,
ast_scan_skill,
format_ast_report,
)
from tools.skills_ast_audit import ast_scan_path, format_ast_report
# ---------------------------------------------------------------------------
# Core detection tests
# ---------------------------------------------------------------------------
def _pids(findings):
return [pid for (_f, _l, pid, _d) in findings]
class TestAstScanPython:
"""AST scanner detects dynamic import and access patterns."""
def test_importlib_import_module_detected(self, tmp_path):
"""importlib.import_module() calls are flagged."""
f = tmp_path / "evil.py"
f.write_text("import importlib\nm = importlib.import_module('os')\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_dynamic_import" in pids
assert "ast_importlib_import" in pids
def test_importlib_submodule_import_detected(self, tmp_path):
"""`import importlib.util` and similar submodules are flagged."""
f = tmp_path / "evil.py"
f.write_text("import importlib.util\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_importlib_import" in pids
def test_importlib_submodule_aliased_import_detected(self, tmp_path):
"""`import importlib.machinery as m` (aliased submodule) is flagged."""
f = tmp_path / "evil.py"
f.write_text("import importlib.machinery as m\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_importlib_import" in pids
def test_from_importlib_import_detected(self, tmp_path):
"""`from importlib import import_module` is flagged."""
f = tmp_path / "evil.py"
f.write_text("from importlib import import_module\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_importlib_import" in pids
def test_from_importlib_submodule_import_detected(self, tmp_path):
"""`from importlib.util import find_spec` is flagged."""
f = tmp_path / "evil.py"
f.write_text("from importlib.util import find_spec\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_importlib_import" in pids
def test_importer_lookalike_not_flagged(self, tmp_path):
"""`import importer` must NOT match — prefix check is dot-bounded."""
f = tmp_path / "ok.py"
f.write_text("import importer\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_importlib_import" not in pids
def test_from_importer_lookalike_not_flagged(self, tmp_path):
"""`from importer import something` must NOT match the importlib check."""
f = tmp_path / "ok.py"
f.write_text("from importer import something\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_importlib_import" not in pids
def test_dunder_import_with_computed_arg_detected(self, tmp_path):
"""__import__ with non-literal argument is flagged."""
f = tmp_path / "evil.py"
f.write_text("name = 'os'\nm = __import__(name)\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_dynamic_import_computed" in pids
def test_dunder_dict_computed_key_detected(self, tmp_path):
"""__dict__[<computed>] access is flagged."""
f = tmp_path / "evil.py"
f.write_text("key = 'environ'\nval = obj.__dict__[key]\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_dict_access" in pids
def test_getattr_with_computed_name_detected(self, tmp_path):
"""getattr(obj, computed_name) is flagged."""
f = tmp_path / "evil.py"
f.write_text("name = 'system'\nfn = getattr(os, name)\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_dynamic_getattr" in pids
def test_syntax_error_handled_gracefully(self, tmp_path):
"""Files with syntax errors should not crash the scanner."""
f = tmp_path / "bad.py"
f.write_text("def broken(\n")
findings = ast_scan_file(f)
assert isinstance(findings, list)
def test_literal_dunder_import_not_flagged_by_ast(self, tmp_path):
"""__import__('os') with literal string is NOT flagged by AST."""
f = tmp_path / "ok.py"
f.write_text("m = __import__('os')\n")
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_dynamic_import_computed" not in pids
def test_full_bypass_payload_now_detected(self, tmp_path):
"""The exact bypass payload from #7072 should now be caught."""
payload = """
import importlib
parts = ['o', 's']
m = importlib.import_module(''.join(parts))
e = m.__dict__[''.join(['e','n','v','i','r','o','n'])]
"""
f = tmp_path / "exfil.py"
f.write_text(payload)
findings = ast_scan_file(f)
pids = [f.pattern_id for f in findings]
assert "ast_dynamic_import" in pids
assert "ast_dict_access" in pids
assert "ast_importlib_import" in pids
def test_non_python_files_return_empty(self, tmp_path):
"""AST scan returns empty list for non-.py files."""
f = tmp_path / "script.sh"
f.write_text("import importlib\nimportlib.import_module('os')\n")
findings = ast_scan_file(f)
assert findings == []
def test_scan_handles_recursion_error_gracefully(self, tmp_path):
"""Deeply-nested expressions that blow the visitor recursion limit
must not crash the scan return whatever findings were collected so far."""
src = "a" + ".x" * 5000 + "\n"
f = tmp_path / "deep.py"
f.write_text(src)
original_limit = sys.getrecursionlimit()
sys.setrecursionlimit(200)
try:
findings = ast_scan_file(f)
finally:
sys.setrecursionlimit(original_limit)
assert isinstance(findings, list)
def test_bypass_payload_detected(tmp_path):
"""The exact bypass shape from #7072 is caught."""
f = tmp_path / "exfil.py"
f.write_text(
"import importlib\n"
"parts = ['o', 's']\n"
"m = importlib.import_module(''.join(parts))\n"
"e = m.__dict__[''.join(['e','n','v'])]\n"
)
pids = _pids(ast_scan_path(f))
assert "dynamic_import" in pids
assert "importlib_import" in pids
assert "dict_access" in pids
# ---------------------------------------------------------------------------
# Directory scanner tests
# ---------------------------------------------------------------------------
def test_syntax_error_does_not_crash(tmp_path):
f = tmp_path / "bad.py"
f.write_text("def broken(\n")
assert ast_scan_path(f) == []
class TestAstScanSkill:
"""Directory-level scanning via ast_scan_skill()."""
def test_scans_all_py_files_in_tree(self, tmp_path):
"""All .py files in a skill directory are scanned recursively."""
skill = tmp_path / "my-skill"
skill.mkdir()
sub = skill / "subpkg"
sub.mkdir()
(skill / "main.py").write_text("import importlib\n")
(sub / "utils.py").write_text("import importlib.util\n")
findings = ast_scan_skill(skill)
pids = [f.pattern_id for f in findings]
# Both files should have importlib findings
assert pids.count("ast_importlib_import") == 2
def test_skips_ignored_dirs(self, tmp_path):
"""__pycache__, venv, .venv, and node_modules directories are skipped."""
skill = tmp_path / "my-skill"
skill.mkdir()
for dirname in ("__pycache__", "venv", ".venv", "node_modules"):
ignored = skill / dirname
ignored.mkdir()
(ignored / "cached.py").write_text("import importlib\n")
findings = ast_scan_skill(skill)
assert findings == []
def test_skips_non_existent_dir(self, tmp_path):
"""Non-existent directory returns empty list."""
findings = ast_scan_skill(Path("/nonexistent/skill/path"))
assert findings == []
def test_non_dir_path(self, tmp_path):
"""A file path (not a directory) returns empty list."""
f = tmp_path / "not_a_dir.py"
f.write_text("import importlib\n")
findings = ast_scan_skill(f)
assert findings == []
def test_recursion_error_does_not_crash(tmp_path):
f = tmp_path / "deep.py"
f.write_text("a" + ".x" * 5000 + "\n")
orig = sys.getrecursionlimit()
sys.setrecursionlimit(200)
try:
result = ast_scan_path(f)
finally:
sys.setrecursionlimit(orig)
assert isinstance(result, list)
class TestAstScanBundleFiles:
"""In-memory bundle scanning for pre-install inspect diagnostics."""
def test_scans_python_files_from_bundle(self):
"""Python files in source adapter bundle mappings are scanned."""
findings = ast_scan_bundle_files({
"SKILL.md": "---\nname: test\n---\n",
"scripts/run.py": "import importlib\n",
"references/readme.md": "import importlib\n",
})
assert [f.pattern_id for f in findings] == ["ast_importlib_import"]
assert findings[0].file == "scripts/run.py"
def test_decodes_bytes_bundle_content(self):
"""Bundle file content may be bytes; decode with replacement."""
findings = ast_scan_bundle_files({
"scripts/run.py": b"from importlib.util import find_spec\n",
})
assert [f.pattern_id for f in findings] == ["ast_importlib_import"]
def test_skips_bundle_cache_dirs(self):
"""Virtualenv/cache paths in a bundle are ignored."""
findings = ast_scan_bundle_files({
"venv/lib/run.py": "import importlib\n",
"__pycache__/cached.py": "import importlib\n",
})
assert findings == []
def test_importer_lookalike_not_flagged(tmp_path):
"""`import importer` must NOT match — dot-bounded prefix."""
f = tmp_path / "ok.py"
f.write_text("import importer\nfrom importer import x\n")
assert _pids(ast_scan_path(f)) == []
# ---------------------------------------------------------------------------
# Report formatting tests
# ---------------------------------------------------------------------------
def test_literal_dunder_import_not_flagged(tmp_path):
"""__import__('os') with a literal is not flagged (regex catches those)."""
f = tmp_path / "ok.py"
f.write_text("m = __import__('os')\n")
assert "dynamic_import_computed" not in _pids(ast_scan_path(f))
class TestFormatAstReport:
"""Rich report formatting."""
def test_non_python_file_returns_empty(tmp_path):
f = tmp_path / "script.sh"
f.write_text("import importlib\n")
assert ast_scan_path(f) == []
def test_empty_findings(self):
"""Empty findings list produces a clean 'nothing found' message."""
report = format_ast_report([])
assert "No AST-level patterns detected" in report
def test_empty_with_skill_name(self):
"""Report with skill name but no findings."""
report = format_ast_report([], skill_name="test-skill")
assert "test-skill" in report
assert "No AST-level patterns detected" in report
def test_directory_scans_recursively_and_skips_cache_dirs(tmp_path):
skill = tmp_path / "s"
skill.mkdir()
(skill / "main.py").write_text("import importlib\n")
(skill / "sub").mkdir()
(skill / "sub" / "u.py").write_text("from importlib.util import find_spec\n")
for d in ("__pycache__", ".venv", "venv", "node_modules"):
ignored = skill / d
ignored.mkdir()
(ignored / "junk.py").write_text("import importlib\n")
pids = _pids(ast_scan_path(skill))
assert pids.count("importlib_import") == 2
def test_findings_grouped_by_file(self):
"""Findings from the same file appear together."""
findings = [
AstFinding(
pattern_id="ast_importlib_import",
severity="medium",
category="obfuscation",
file="main.py",
line=1,
match="import importlib",
description="importlib imported",
),
AstFinding(
pattern_id="ast_dynamic_import",
severity="high",
category="obfuscation",
file="main.py",
line=3,
match="importlib.import_module()",
description="dynamic import via importlib",
),
]
report = format_ast_report(findings)
assert "main.py" in report
assert "importlib imported" in report
assert "dynamic import via importlib" in report
assert "2 finding" in report # summary line
assert "Note: AST findings are diagnostic hints" in report
def test_severity_summary(self):
"""Report header includes severity counts."""
findings = [
AstFinding("id1", "high", "x", "f.py", 1, "m", "desc"),
AstFinding("id2", "high", "x", "f.py", 2, "m", "desc"),
AstFinding("id3", "medium", "x", "f.py", 3, "m", "desc"),
]
report = format_ast_report(findings)
assert "2 high" in report
assert "1 medium" in report
def test_missing_path_returns_empty(tmp_path):
assert ast_scan_path(tmp_path / "does_not_exist") == []
def test_dynamic_getattr_and_dict_access_detected(tmp_path):
f = tmp_path / "g.py"
f.write_text("name = 'x'\nv = getattr(o, name)\nv = o.__dict__[name]\n")
pids = _pids(ast_scan_path(f))
assert "dynamic_getattr" in pids
assert "dict_access" in pids
def test_format_report_empty():
assert "No dynamic" in format_ast_report([])
def test_format_report_with_findings():
findings = [
("a.py", 1, "importlib_import", "import importlib — ..."),
("a.py", 3, "dynamic_import", "importlib.import_module() — ..."),
]
out = format_ast_report(findings, skill_name="test")
assert "test" in out and "a.py" in out and "L1" in out and "L3" in out
assert "diagnostic hints" in out