mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-07 08:02:23 +00:00
refactor(skills): slim AST diagnostic to single entry point
Trim ~600 LOC off the original contribution while keeping the same operator-facing surface and detection coverage. - Collapse three entry points (file / dir / bundle) into one ast_scan_path(path) that handles both files and directories. - Drop AstFinding dataclass + severity field — replaced with plain (file, line, pattern_id, description) tuples. Severity ordering was display-only for a diagnostic that explicitly disclaims security verdicts, so the field added bookkeeping without earning its place. - Replace Rich-markup formatter with plain text grouped by file. - Drop the 'inspect --ast-deep' surface — same scanner, same output as 'audit --deep', single CLI entry is enough. Operators audit after install; pre-install inspection signal isn't worth the second surface. - Trim test file to the cases that earn their place: bypass payload, syntax error survival, RecursionError survival, false-positive guard (importer lookalike), literal-arg false-positive guard, non-.py ignored, directory recursion + cache-dir skipping, missing-path, getattr/__dict__ detection, formatter empty + populated. Net: tools/skills_ast_audit.py 353 -> 133 LOC, tests/tools/test_skills_ast_audit.py 299 -> 103 LOC, full diff +704/-12 -> +264/-6. No change to tools/skills_guard.py — Skills Guard verdicts remain untouched per SECURITY.md §2.4.
This commit is contained in:
parent
7255050c99
commit
4254f7dd17
4 changed files with 175 additions and 609 deletions
|
|
@ -1,353 +1,133 @@
|
|||
"""
|
||||
AST-level deep audit for skill Python files — opt-in diagnostic, not a security gate.
|
||||
|
||||
This is a standalone diagnostic tool per SECURITY.md spirit: it helps operators
|
||||
inspect skill code for patterns that *could* enable dynamic import/access
|
||||
obfuscation, but it is NOT a security boundary. Every pattern flagged here has
|
||||
legitimate uses. Use your judgment.
|
||||
Per SECURITY.md §2.4, Skills Guard is in-process heuristics ("useful — not
|
||||
boundaries"). This module is a separate opt-in diagnostic that flags dynamic
|
||||
import / dynamic attribute access patterns operators may want to eyeball when
|
||||
reviewing third-party skill code. Every pattern flagged here has legitimate
|
||||
uses; findings are hints for human review, not verdicts.
|
||||
|
||||
Usage::
|
||||
|
||||
from tools.skills_ast_audit import ast_scan_skill, format_ast_report
|
||||
|
||||
findings = ast_scan_skill(Path("~/.hermes/skills/some-skill"))
|
||||
if findings:
|
||||
print(format_ast_report(findings))
|
||||
|
||||
CLI integration: ``hermes skills audit --deep``
|
||||
CLI: ``hermes skills audit --deep``
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Mapping, List, Optional, Union
|
||||
from typing import List, Tuple
|
||||
|
||||
# (file, line, pattern_id, description)
|
||||
Finding = Tuple[str, int, str, str]
|
||||
|
||||
_IGNORED_DIRS = {"__pycache__", ".venv", "venv", "node_modules"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data model
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class AstFinding:
|
||||
"""A single finding from AST-level analysis."""
|
||||
|
||||
pattern_id: str
|
||||
"""Short identifier for deduplication and grouping (e.g. 'ast_importlib_import')."""
|
||||
|
||||
severity: str
|
||||
"""One of 'high', 'medium', 'low' — for display only, not a security claim."""
|
||||
|
||||
category: str
|
||||
"""Grouping label — currently always 'obfuscation'."""
|
||||
|
||||
file: str
|
||||
"""Relative path to the file containing the finding."""
|
||||
|
||||
line: int
|
||||
"""1-based line number."""
|
||||
|
||||
match: str
|
||||
"""The matched source construct (human-readable snippet)."""
|
||||
|
||||
description: str
|
||||
"""Why this pattern is worth reviewing."""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scanner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _ast_scan_python(content: str, rel_path: str) -> List[AstFinding]:
|
||||
"""Detect obfuscation via dynamic imports, attribute access, and string construction.
|
||||
|
||||
Hostile or pathological input (deeply-nested expressions, malformed source)
|
||||
must not crash the scan. Both ``ast.parse`` and the visitor traversal are
|
||||
guarded so parse/visit failures degrade gracefully to "no AST findings"
|
||||
rather than raising.
|
||||
"""
|
||||
def _scan_source(content: str, rel_path: str) -> List[Finding]:
|
||||
try:
|
||||
tree = ast.parse(content)
|
||||
except (SyntaxError, ValueError, RecursionError):
|
||||
return []
|
||||
|
||||
findings: List[AstFinding] = []
|
||||
findings: List[Finding] = []
|
||||
|
||||
class _Visitor(ast.NodeVisitor):
|
||||
class V(ast.NodeVisitor):
|
||||
def visit_Call(self, node):
|
||||
# Detect importlib.import_module(...)
|
||||
if (
|
||||
isinstance(node.func, ast.Attribute)
|
||||
and node.func.attr == "import_module"
|
||||
):
|
||||
findings.append(
|
||||
AstFinding(
|
||||
pattern_id="ast_dynamic_import",
|
||||
severity="high",
|
||||
category="obfuscation",
|
||||
file=rel_path,
|
||||
line=node.lineno,
|
||||
match="importlib.import_module()",
|
||||
description="dynamic import via importlib — can load arbitrary modules at runtime",
|
||||
)
|
||||
)
|
||||
# Detect __import__ with non-literal argument
|
||||
if isinstance(node.func, ast.Name) and node.func.id == "__import__":
|
||||
f = node.func
|
||||
# importlib.import_module(...)
|
||||
if isinstance(f, ast.Attribute) and f.attr == "import_module":
|
||||
findings.append((rel_path, node.lineno, "dynamic_import",
|
||||
"importlib.import_module() — loads arbitrary modules at runtime"))
|
||||
# __import__(<computed>)
|
||||
elif isinstance(f, ast.Name) and f.id == "__import__":
|
||||
if node.args and not isinstance(node.args[0], ast.Constant):
|
||||
findings.append(
|
||||
AstFinding(
|
||||
pattern_id="ast_dynamic_import_computed",
|
||||
severity="high",
|
||||
category="obfuscation",
|
||||
file=rel_path,
|
||||
line=node.lineno,
|
||||
match="__import__(<computed>)",
|
||||
description="__import__ with dynamically constructed module name",
|
||||
)
|
||||
)
|
||||
# Detect getattr with computed attribute name
|
||||
if isinstance(node.func, ast.Name) and node.func.id == "getattr":
|
||||
if len(node.args) >= 2 and not isinstance(
|
||||
node.args[1], ast.Constant
|
||||
):
|
||||
findings.append(
|
||||
AstFinding(
|
||||
pattern_id="ast_dynamic_getattr",
|
||||
severity="medium",
|
||||
category="obfuscation",
|
||||
file=rel_path,
|
||||
line=node.lineno,
|
||||
match="getattr(<obj>, <computed>)",
|
||||
description="getattr with dynamically constructed attribute name",
|
||||
)
|
||||
)
|
||||
findings.append((rel_path, node.lineno, "dynamic_import_computed",
|
||||
"__import__ with non-literal module name"))
|
||||
# getattr(obj, <computed>)
|
||||
elif isinstance(f, ast.Name) and f.id == "getattr":
|
||||
if len(node.args) >= 2 and not isinstance(node.args[1], ast.Constant):
|
||||
findings.append((rel_path, node.lineno, "dynamic_getattr",
|
||||
"getattr with non-literal attribute name"))
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_Subscript(self, node):
|
||||
# Detect obj.__dict__[<computed>]
|
||||
if (
|
||||
isinstance(node.value, ast.Attribute)
|
||||
and node.value.attr == "__dict__"
|
||||
):
|
||||
if not isinstance(node.slice, ast.Constant):
|
||||
findings.append(
|
||||
AstFinding(
|
||||
pattern_id="ast_dict_access",
|
||||
severity="high",
|
||||
category="obfuscation",
|
||||
file=rel_path,
|
||||
line=node.lineno,
|
||||
match="__dict__[<computed>]",
|
||||
description="dynamic attribute access via __dict__ with computed key",
|
||||
)
|
||||
)
|
||||
# obj.__dict__[<computed>]
|
||||
if (isinstance(node.value, ast.Attribute)
|
||||
and node.value.attr == "__dict__"
|
||||
and not isinstance(node.slice, ast.Constant)):
|
||||
findings.append((rel_path, node.lineno, "dict_access",
|
||||
"__dict__[<computed>] — dynamic attribute access"))
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_Import(self, node):
|
||||
# Flag importlib and any importlib.* submodule.
|
||||
for alias in node.names:
|
||||
if alias.name == "importlib" or alias.name.startswith(
|
||||
"importlib."
|
||||
):
|
||||
findings.append(
|
||||
AstFinding(
|
||||
pattern_id="ast_importlib_import",
|
||||
severity="medium",
|
||||
category="obfuscation",
|
||||
file=rel_path,
|
||||
line=node.lineno,
|
||||
match=f"import {alias.name}",
|
||||
description="importlib imported — enables dynamic module loading",
|
||||
)
|
||||
)
|
||||
for a in node.names:
|
||||
if a.name == "importlib" or a.name.startswith("importlib."):
|
||||
findings.append((rel_path, node.lineno, "importlib_import",
|
||||
f"import {a.name} — enables dynamic module loading"))
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_ImportFrom(self, node):
|
||||
module = node.module or ""
|
||||
if module == "importlib" or module.startswith("importlib."):
|
||||
findings.append(
|
||||
AstFinding(
|
||||
pattern_id="ast_importlib_import",
|
||||
severity="medium",
|
||||
category="obfuscation",
|
||||
file=rel_path,
|
||||
line=node.lineno,
|
||||
match=f"from {module} import ...",
|
||||
description="importlib imported — enables dynamic module loading",
|
||||
)
|
||||
)
|
||||
m = node.module or ""
|
||||
if m == "importlib" or m.startswith("importlib."):
|
||||
findings.append((rel_path, node.lineno, "importlib_import",
|
||||
f"from {m} import ... — enables dynamic module loading"))
|
||||
self.generic_visit(node)
|
||||
|
||||
try:
|
||||
_Visitor().visit(tree)
|
||||
V().visit(tree)
|
||||
except (RecursionError, ValueError, RuntimeError):
|
||||
# Visitor traversal can fail on hostile input even when ast.parse
|
||||
# succeeded (e.g. deeply-nested call/attribute chains). Return
|
||||
# whatever findings we collected before the failure.
|
||||
return findings
|
||||
# Hostile/pathological input: return what we collected so far.
|
||||
pass
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def ast_scan_file(file_path: Path, rel_path: Optional[str] = None) -> List[AstFinding]:
|
||||
"""Scan a single Python file and return AST-level findings.
|
||||
def ast_scan_path(path: Path) -> List[Finding]:
|
||||
"""Scan a single .py file or recursively scan all .py under a directory.
|
||||
|
||||
Args:
|
||||
file_path: Absolute path to the .py file.
|
||||
rel_path: Relative path for display (defaults to file_path.name).
|
||||
|
||||
Returns:
|
||||
List of :class:`AstFinding` — empty if the file isn't Python or scan yields nothing.
|
||||
Returns a list of (file, line, pattern_id, description) tuples. Empty for
|
||||
non-Python paths, missing paths, or paths with no matching patterns.
|
||||
"""
|
||||
if file_path.suffix.lower() != ".py":
|
||||
if path.is_file():
|
||||
if path.suffix.lower() != ".py":
|
||||
return []
|
||||
try:
|
||||
content = path.read_text(encoding="utf-8", errors="replace")
|
||||
except OSError:
|
||||
return []
|
||||
return _scan_source(content, path.name)
|
||||
|
||||
if not path.is_dir():
|
||||
return []
|
||||
|
||||
if rel_path is None:
|
||||
rel_path = file_path.name
|
||||
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8", errors="replace")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
return []
|
||||
|
||||
return _ast_scan_python(content, rel_path)
|
||||
|
||||
|
||||
def ast_scan_skill(skill_path: Path) -> List[AstFinding]:
|
||||
"""Recursively scan all Python files in a skill directory.
|
||||
|
||||
Args:
|
||||
skill_path: Path to the installed skill directory.
|
||||
|
||||
Returns:
|
||||
Combined list of :class:`AstFinding` across all .py files.
|
||||
"""
|
||||
if not skill_path.is_dir():
|
||||
return []
|
||||
|
||||
all_findings: List[AstFinding] = []
|
||||
|
||||
for py_file in sorted(skill_path.rglob("*.py")):
|
||||
# Skip __pycache__ and .venv/venv directories
|
||||
parts = set(py_file.parent.parts)
|
||||
if parts & {"__pycache__", ".venv", "venv", "node_modules"}:
|
||||
out: List[Finding] = []
|
||||
for py in sorted(path.rglob("*.py")):
|
||||
if set(py.parent.parts) & _IGNORED_DIRS:
|
||||
continue
|
||||
try:
|
||||
rel = py_file.relative_to(skill_path).as_posix()
|
||||
content = py.read_text(encoding="utf-8", errors="replace")
|
||||
except OSError:
|
||||
continue
|
||||
try:
|
||||
rel = py.relative_to(path).as_posix()
|
||||
except ValueError:
|
||||
rel = py_file.name
|
||||
all_findings.extend(ast_scan_file(py_file, rel))
|
||||
|
||||
return all_findings
|
||||
rel = py.name
|
||||
out.extend(_scan_source(content, rel))
|
||||
return out
|
||||
|
||||
|
||||
def ast_scan_bundle_files(
|
||||
files: Mapping[str, Union[str, bytes]],
|
||||
) -> List[AstFinding]:
|
||||
"""Scan Python files from an in-memory skill bundle.
|
||||
|
||||
This powers ``hermes skills inspect --ast-deep`` so operators can review
|
||||
a skill before installing it. The input is the bundle's filename -> content
|
||||
mapping, as returned by the skills hub source adapters.
|
||||
"""
|
||||
all_findings: List[AstFinding] = []
|
||||
|
||||
for rel_path, content in sorted(files.items()):
|
||||
path = Path(rel_path)
|
||||
if path.suffix.lower() != ".py":
|
||||
continue
|
||||
if set(path.parts) & {"__pycache__", ".venv", "venv", "node_modules"}:
|
||||
continue
|
||||
if isinstance(content, bytes):
|
||||
text = content.decode("utf-8", errors="replace")
|
||||
else:
|
||||
text = str(content)
|
||||
all_findings.extend(_ast_scan_python(text, rel_path))
|
||||
|
||||
return all_findings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rich formatting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def format_ast_report(
|
||||
findings: List[AstFinding],
|
||||
skill_name: str = "",
|
||||
) -> str:
|
||||
"""Format AST findings as a Rich-markup string.
|
||||
|
||||
Args:
|
||||
findings: List of findings from :func:`ast_scan_skill`.
|
||||
skill_name: Optional skill name for the report header.
|
||||
|
||||
Returns:
|
||||
Rich-markup string suitable for ``console.print()``.
|
||||
"""
|
||||
def format_ast_report(findings: List[Finding], skill_name: str = "") -> str:
|
||||
"""Plain-text report (Rich-markup-free) grouped by file."""
|
||||
header = f"AST deep scan: {skill_name}" if skill_name else "AST deep scan"
|
||||
if not findings:
|
||||
header = (
|
||||
f"[bold]AST Deep Scan: {skill_name}[/]"
|
||||
if skill_name
|
||||
else "[bold]AST Deep Scan[/]"
|
||||
)
|
||||
return f"{header}\n[dim green]No AST-level patterns detected.[/]"
|
||||
return f"{header}\n No dynamic import/access patterns detected."
|
||||
|
||||
lines: List[str] = []
|
||||
severity_order = {"high": 0, "medium": 1, "low": 2}
|
||||
findings_sorted = sorted(
|
||||
findings,
|
||||
key=lambda f: (
|
||||
severity_order.get(f.severity, 99),
|
||||
f.file,
|
||||
f.line,
|
||||
),
|
||||
)
|
||||
|
||||
if skill_name:
|
||||
lines.append(f"[bold]AST Deep Scan: {skill_name}[/]")
|
||||
else:
|
||||
lines.append("[bold]AST Deep Scan[/]")
|
||||
|
||||
total = len(findings_sorted)
|
||||
high_count = sum(1 for f in findings_sorted if f.severity == "high")
|
||||
med_count = sum(1 for f in findings_sorted if f.severity == "medium")
|
||||
low_count = sum(1 for f in findings_sorted if f.severity == "low")
|
||||
|
||||
summary_parts = []
|
||||
if high_count:
|
||||
summary_parts.append(f"[bold red]{high_count} high[/]")
|
||||
if med_count:
|
||||
summary_parts.append(f"[yellow]{med_count} medium[/]")
|
||||
if low_count:
|
||||
summary_parts.append(f"[dim]{low_count} low[/]")
|
||||
lines.append(
|
||||
f"[dim]{total} finding(s)[/] — "
|
||||
+ ", ".join(summary_parts)
|
||||
if summary_parts
|
||||
else f"[dim]{total} finding(s)[/]"
|
||||
)
|
||||
lines = [header, f" {len(findings)} finding(s):"]
|
||||
current = None
|
||||
for f, line, pid, desc in sorted(findings):
|
||||
if f != current:
|
||||
current = f
|
||||
lines.append(f" {f}")
|
||||
lines.append(f" L{line} {pid} — {desc}")
|
||||
lines.append("")
|
||||
|
||||
current_file = None
|
||||
for f in findings_sorted:
|
||||
if f.file != current_file:
|
||||
current_file = f.file
|
||||
lines.append(f" [bold cyan]{f.file}[/]")
|
||||
sev_color = {"high": "bold red", "medium": "yellow", "low": "dim"}.get(
|
||||
f.severity, "dim"
|
||||
)
|
||||
lines.append(
|
||||
f" L{f.line:>4} [{sev_color}]{f.severity:6}[/] {f.description}"
|
||||
)
|
||||
lines.append(f" [dim]{f.match}[/]")
|
||||
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"[dim]Note: AST findings are diagnostic hints, not security verdicts. "
|
||||
"Review each pattern in context.[/]"
|
||||
)
|
||||
|
||||
lines.append(" Note: diagnostic hints for human review, not security verdicts.")
|
||||
return "\n".join(lines)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue