mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-08 08:11:38 +00:00
refactor(skills): slim AST diagnostic to single entry point
Trim ~600 LOC off the original contribution while keeping the same operator-facing surface and detection coverage. - Collapse three entry points (file / dir / bundle) into one ast_scan_path(path) that handles both files and directories. - Drop AstFinding dataclass + severity field — replaced with plain (file, line, pattern_id, description) tuples. Severity ordering was display-only for a diagnostic that explicitly disclaims security verdicts, so the field added bookkeeping without earning its place. - Replace Rich-markup formatter with plain text grouped by file. - Drop the 'inspect --ast-deep' surface — same scanner, same output as 'audit --deep', single CLI entry is enough. Operators audit after install; pre-install inspection signal isn't worth the second surface. - Trim test file to the cases that earn their place: bypass payload, syntax error survival, RecursionError survival, false-positive guard (importer lookalike), literal-arg false-positive guard, non-.py ignored, directory recursion + cache-dir skipping, missing-path, getattr/__dict__ detection, formatter empty + populated. Net: tools/skills_ast_audit.py 353 -> 133 LOC, tests/tools/test_skills_ast_audit.py 299 -> 103 LOC, full diff +704/-12 -> +264/-6. No change to tools/skills_guard.py — Skills Guard verdicts remain untouched per SECURITY.md §2.4.
This commit is contained in:
parent
7255050c99
commit
4254f7dd17
4 changed files with 175 additions and 609 deletions
|
|
@ -1,299 +1,103 @@
|
|||
"""
|
||||
Tests for tools.skills_ast_audit — the opt-in AST diagnostic scanner.
|
||||
|
||||
These tests verify detection of dynamic import/access patterns that can
|
||||
bypass line-by-line regex scanning, without crashing on hostile or
|
||||
pathological input.
|
||||
"""
|
||||
"""Tests for tools.skills_ast_audit — opt-in AST diagnostic scanner."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from tools.skills_ast_audit import (
|
||||
AstFinding,
|
||||
ast_scan_bundle_files,
|
||||
ast_scan_file,
|
||||
ast_scan_skill,
|
||||
format_ast_report,
|
||||
)
|
||||
from tools.skills_ast_audit import ast_scan_path, format_ast_report
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core detection tests
|
||||
# ---------------------------------------------------------------------------
|
||||
def _pids(findings):
|
||||
return [pid for (_f, _l, pid, _d) in findings]
|
||||
|
||||
|
||||
class TestAstScanPython:
|
||||
"""AST scanner detects dynamic import and access patterns."""
|
||||
|
||||
def test_importlib_import_module_detected(self, tmp_path):
|
||||
"""importlib.import_module() calls are flagged."""
|
||||
f = tmp_path / "evil.py"
|
||||
f.write_text("import importlib\nm = importlib.import_module('os')\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_dynamic_import" in pids
|
||||
assert "ast_importlib_import" in pids
|
||||
|
||||
def test_importlib_submodule_import_detected(self, tmp_path):
|
||||
"""`import importlib.util` and similar submodules are flagged."""
|
||||
f = tmp_path / "evil.py"
|
||||
f.write_text("import importlib.util\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_importlib_import" in pids
|
||||
|
||||
def test_importlib_submodule_aliased_import_detected(self, tmp_path):
|
||||
"""`import importlib.machinery as m` (aliased submodule) is flagged."""
|
||||
f = tmp_path / "evil.py"
|
||||
f.write_text("import importlib.machinery as m\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_importlib_import" in pids
|
||||
|
||||
def test_from_importlib_import_detected(self, tmp_path):
|
||||
"""`from importlib import import_module` is flagged."""
|
||||
f = tmp_path / "evil.py"
|
||||
f.write_text("from importlib import import_module\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_importlib_import" in pids
|
||||
|
||||
def test_from_importlib_submodule_import_detected(self, tmp_path):
|
||||
"""`from importlib.util import find_spec` is flagged."""
|
||||
f = tmp_path / "evil.py"
|
||||
f.write_text("from importlib.util import find_spec\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_importlib_import" in pids
|
||||
|
||||
def test_importer_lookalike_not_flagged(self, tmp_path):
|
||||
"""`import importer` must NOT match — prefix check is dot-bounded."""
|
||||
f = tmp_path / "ok.py"
|
||||
f.write_text("import importer\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_importlib_import" not in pids
|
||||
|
||||
def test_from_importer_lookalike_not_flagged(self, tmp_path):
|
||||
"""`from importer import something` must NOT match the importlib check."""
|
||||
f = tmp_path / "ok.py"
|
||||
f.write_text("from importer import something\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_importlib_import" not in pids
|
||||
|
||||
def test_dunder_import_with_computed_arg_detected(self, tmp_path):
|
||||
"""__import__ with non-literal argument is flagged."""
|
||||
f = tmp_path / "evil.py"
|
||||
f.write_text("name = 'os'\nm = __import__(name)\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_dynamic_import_computed" in pids
|
||||
|
||||
def test_dunder_dict_computed_key_detected(self, tmp_path):
|
||||
"""__dict__[<computed>] access is flagged."""
|
||||
f = tmp_path / "evil.py"
|
||||
f.write_text("key = 'environ'\nval = obj.__dict__[key]\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_dict_access" in pids
|
||||
|
||||
def test_getattr_with_computed_name_detected(self, tmp_path):
|
||||
"""getattr(obj, computed_name) is flagged."""
|
||||
f = tmp_path / "evil.py"
|
||||
f.write_text("name = 'system'\nfn = getattr(os, name)\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_dynamic_getattr" in pids
|
||||
|
||||
def test_syntax_error_handled_gracefully(self, tmp_path):
|
||||
"""Files with syntax errors should not crash the scanner."""
|
||||
f = tmp_path / "bad.py"
|
||||
f.write_text("def broken(\n")
|
||||
findings = ast_scan_file(f)
|
||||
assert isinstance(findings, list)
|
||||
|
||||
def test_literal_dunder_import_not_flagged_by_ast(self, tmp_path):
|
||||
"""__import__('os') with literal string is NOT flagged by AST."""
|
||||
f = tmp_path / "ok.py"
|
||||
f.write_text("m = __import__('os')\n")
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_dynamic_import_computed" not in pids
|
||||
|
||||
def test_full_bypass_payload_now_detected(self, tmp_path):
|
||||
"""The exact bypass payload from #7072 should now be caught."""
|
||||
payload = """
|
||||
import importlib
|
||||
parts = ['o', 's']
|
||||
m = importlib.import_module(''.join(parts))
|
||||
e = m.__dict__[''.join(['e','n','v','i','r','o','n'])]
|
||||
"""
|
||||
f = tmp_path / "exfil.py"
|
||||
f.write_text(payload)
|
||||
findings = ast_scan_file(f)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
assert "ast_dynamic_import" in pids
|
||||
assert "ast_dict_access" in pids
|
||||
assert "ast_importlib_import" in pids
|
||||
|
||||
def test_non_python_files_return_empty(self, tmp_path):
|
||||
"""AST scan returns empty list for non-.py files."""
|
||||
f = tmp_path / "script.sh"
|
||||
f.write_text("import importlib\nimportlib.import_module('os')\n")
|
||||
findings = ast_scan_file(f)
|
||||
assert findings == []
|
||||
|
||||
def test_scan_handles_recursion_error_gracefully(self, tmp_path):
|
||||
"""Deeply-nested expressions that blow the visitor recursion limit
|
||||
must not crash the scan — return whatever findings were collected so far."""
|
||||
src = "a" + ".x" * 5000 + "\n"
|
||||
f = tmp_path / "deep.py"
|
||||
f.write_text(src)
|
||||
|
||||
original_limit = sys.getrecursionlimit()
|
||||
sys.setrecursionlimit(200)
|
||||
try:
|
||||
findings = ast_scan_file(f)
|
||||
finally:
|
||||
sys.setrecursionlimit(original_limit)
|
||||
|
||||
assert isinstance(findings, list)
|
||||
def test_bypass_payload_detected(tmp_path):
|
||||
"""The exact bypass shape from #7072 is caught."""
|
||||
f = tmp_path / "exfil.py"
|
||||
f.write_text(
|
||||
"import importlib\n"
|
||||
"parts = ['o', 's']\n"
|
||||
"m = importlib.import_module(''.join(parts))\n"
|
||||
"e = m.__dict__[''.join(['e','n','v'])]\n"
|
||||
)
|
||||
pids = _pids(ast_scan_path(f))
|
||||
assert "dynamic_import" in pids
|
||||
assert "importlib_import" in pids
|
||||
assert "dict_access" in pids
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Directory scanner tests
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_syntax_error_does_not_crash(tmp_path):
|
||||
f = tmp_path / "bad.py"
|
||||
f.write_text("def broken(\n")
|
||||
assert ast_scan_path(f) == []
|
||||
|
||||
|
||||
class TestAstScanSkill:
|
||||
"""Directory-level scanning via ast_scan_skill()."""
|
||||
|
||||
def test_scans_all_py_files_in_tree(self, tmp_path):
|
||||
"""All .py files in a skill directory are scanned recursively."""
|
||||
skill = tmp_path / "my-skill"
|
||||
skill.mkdir()
|
||||
sub = skill / "subpkg"
|
||||
sub.mkdir()
|
||||
|
||||
(skill / "main.py").write_text("import importlib\n")
|
||||
(sub / "utils.py").write_text("import importlib.util\n")
|
||||
|
||||
findings = ast_scan_skill(skill)
|
||||
pids = [f.pattern_id for f in findings]
|
||||
# Both files should have importlib findings
|
||||
assert pids.count("ast_importlib_import") == 2
|
||||
|
||||
def test_skips_ignored_dirs(self, tmp_path):
|
||||
"""__pycache__, venv, .venv, and node_modules directories are skipped."""
|
||||
skill = tmp_path / "my-skill"
|
||||
skill.mkdir()
|
||||
for dirname in ("__pycache__", "venv", ".venv", "node_modules"):
|
||||
ignored = skill / dirname
|
||||
ignored.mkdir()
|
||||
(ignored / "cached.py").write_text("import importlib\n")
|
||||
|
||||
findings = ast_scan_skill(skill)
|
||||
assert findings == []
|
||||
|
||||
def test_skips_non_existent_dir(self, tmp_path):
|
||||
"""Non-existent directory returns empty list."""
|
||||
findings = ast_scan_skill(Path("/nonexistent/skill/path"))
|
||||
assert findings == []
|
||||
|
||||
def test_non_dir_path(self, tmp_path):
|
||||
"""A file path (not a directory) returns empty list."""
|
||||
f = tmp_path / "not_a_dir.py"
|
||||
f.write_text("import importlib\n")
|
||||
findings = ast_scan_skill(f)
|
||||
assert findings == []
|
||||
def test_recursion_error_does_not_crash(tmp_path):
|
||||
f = tmp_path / "deep.py"
|
||||
f.write_text("a" + ".x" * 5000 + "\n")
|
||||
orig = sys.getrecursionlimit()
|
||||
sys.setrecursionlimit(200)
|
||||
try:
|
||||
result = ast_scan_path(f)
|
||||
finally:
|
||||
sys.setrecursionlimit(orig)
|
||||
assert isinstance(result, list)
|
||||
|
||||
|
||||
class TestAstScanBundleFiles:
|
||||
"""In-memory bundle scanning for pre-install inspect diagnostics."""
|
||||
|
||||
def test_scans_python_files_from_bundle(self):
|
||||
"""Python files in source adapter bundle mappings are scanned."""
|
||||
findings = ast_scan_bundle_files({
|
||||
"SKILL.md": "---\nname: test\n---\n",
|
||||
"scripts/run.py": "import importlib\n",
|
||||
"references/readme.md": "import importlib\n",
|
||||
})
|
||||
assert [f.pattern_id for f in findings] == ["ast_importlib_import"]
|
||||
assert findings[0].file == "scripts/run.py"
|
||||
|
||||
def test_decodes_bytes_bundle_content(self):
|
||||
"""Bundle file content may be bytes; decode with replacement."""
|
||||
findings = ast_scan_bundle_files({
|
||||
"scripts/run.py": b"from importlib.util import find_spec\n",
|
||||
})
|
||||
assert [f.pattern_id for f in findings] == ["ast_importlib_import"]
|
||||
|
||||
def test_skips_bundle_cache_dirs(self):
|
||||
"""Virtualenv/cache paths in a bundle are ignored."""
|
||||
findings = ast_scan_bundle_files({
|
||||
"venv/lib/run.py": "import importlib\n",
|
||||
"__pycache__/cached.py": "import importlib\n",
|
||||
})
|
||||
assert findings == []
|
||||
def test_importer_lookalike_not_flagged(tmp_path):
|
||||
"""`import importer` must NOT match — dot-bounded prefix."""
|
||||
f = tmp_path / "ok.py"
|
||||
f.write_text("import importer\nfrom importer import x\n")
|
||||
assert _pids(ast_scan_path(f)) == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Report formatting tests
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_literal_dunder_import_not_flagged(tmp_path):
|
||||
"""__import__('os') with a literal is not flagged (regex catches those)."""
|
||||
f = tmp_path / "ok.py"
|
||||
f.write_text("m = __import__('os')\n")
|
||||
assert "dynamic_import_computed" not in _pids(ast_scan_path(f))
|
||||
|
||||
|
||||
class TestFormatAstReport:
|
||||
"""Rich report formatting."""
|
||||
def test_non_python_file_returns_empty(tmp_path):
|
||||
f = tmp_path / "script.sh"
|
||||
f.write_text("import importlib\n")
|
||||
assert ast_scan_path(f) == []
|
||||
|
||||
def test_empty_findings(self):
|
||||
"""Empty findings list produces a clean 'nothing found' message."""
|
||||
report = format_ast_report([])
|
||||
assert "No AST-level patterns detected" in report
|
||||
|
||||
def test_empty_with_skill_name(self):
|
||||
"""Report with skill name but no findings."""
|
||||
report = format_ast_report([], skill_name="test-skill")
|
||||
assert "test-skill" in report
|
||||
assert "No AST-level patterns detected" in report
|
||||
def test_directory_scans_recursively_and_skips_cache_dirs(tmp_path):
|
||||
skill = tmp_path / "s"
|
||||
skill.mkdir()
|
||||
(skill / "main.py").write_text("import importlib\n")
|
||||
(skill / "sub").mkdir()
|
||||
(skill / "sub" / "u.py").write_text("from importlib.util import find_spec\n")
|
||||
for d in ("__pycache__", ".venv", "venv", "node_modules"):
|
||||
ignored = skill / d
|
||||
ignored.mkdir()
|
||||
(ignored / "junk.py").write_text("import importlib\n")
|
||||
pids = _pids(ast_scan_path(skill))
|
||||
assert pids.count("importlib_import") == 2
|
||||
|
||||
def test_findings_grouped_by_file(self):
|
||||
"""Findings from the same file appear together."""
|
||||
findings = [
|
||||
AstFinding(
|
||||
pattern_id="ast_importlib_import",
|
||||
severity="medium",
|
||||
category="obfuscation",
|
||||
file="main.py",
|
||||
line=1,
|
||||
match="import importlib",
|
||||
description="importlib imported",
|
||||
),
|
||||
AstFinding(
|
||||
pattern_id="ast_dynamic_import",
|
||||
severity="high",
|
||||
category="obfuscation",
|
||||
file="main.py",
|
||||
line=3,
|
||||
match="importlib.import_module()",
|
||||
description="dynamic import via importlib",
|
||||
),
|
||||
]
|
||||
report = format_ast_report(findings)
|
||||
assert "main.py" in report
|
||||
assert "importlib imported" in report
|
||||
assert "dynamic import via importlib" in report
|
||||
assert "2 finding" in report # summary line
|
||||
assert "Note: AST findings are diagnostic hints" in report
|
||||
|
||||
def test_severity_summary(self):
|
||||
"""Report header includes severity counts."""
|
||||
findings = [
|
||||
AstFinding("id1", "high", "x", "f.py", 1, "m", "desc"),
|
||||
AstFinding("id2", "high", "x", "f.py", 2, "m", "desc"),
|
||||
AstFinding("id3", "medium", "x", "f.py", 3, "m", "desc"),
|
||||
]
|
||||
report = format_ast_report(findings)
|
||||
assert "2 high" in report
|
||||
assert "1 medium" in report
|
||||
def test_missing_path_returns_empty(tmp_path):
|
||||
assert ast_scan_path(tmp_path / "does_not_exist") == []
|
||||
|
||||
|
||||
def test_dynamic_getattr_and_dict_access_detected(tmp_path):
|
||||
f = tmp_path / "g.py"
|
||||
f.write_text("name = 'x'\nv = getattr(o, name)\nv = o.__dict__[name]\n")
|
||||
pids = _pids(ast_scan_path(f))
|
||||
assert "dynamic_getattr" in pids
|
||||
assert "dict_access" in pids
|
||||
|
||||
|
||||
def test_format_report_empty():
|
||||
assert "No dynamic" in format_ast_report([])
|
||||
|
||||
|
||||
def test_format_report_with_findings():
|
||||
findings = [
|
||||
("a.py", 1, "importlib_import", "import importlib — ..."),
|
||||
("a.py", 3, "dynamic_import", "importlib.import_module() — ..."),
|
||||
]
|
||||
out = format_ast_report(findings, skill_name="test")
|
||||
assert "test" in out and "a.py" in out and "L1" in out and "L3" in out
|
||||
assert "diagnostic hints" in out
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue