""" Regex-based security pattern definitions for the security-guidance plugin. Pure data + one pure helper. No env-var reads, no I/O — kept side-effect-free so it can be imported in isolation. Forked verbatim from Anthropic's claude-plugins-official repository (plugins/security-guidance/hooks/patterns.py) under the Apache License 2.0: https://github.com/anthropics/claude-plugins-official Copyright (c) Anthropic, PBC. and the security-guidance contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Modifications by NousResearch for the Hermes Agent plugin port: - none to the pattern data itself; this file is byte-for-byte the upstream patterns.py at commit 0bde168 (2026-05-26). Hermes-side wiring lives in __init__.py. """ from enum import IntEnum _JS_EXTS = (".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", ".mts", ".cts", ".vue", ".svelte") _PY_EXTS = (".py", ".pyi", ".ipynb") _DOC_EXTS = (".md", ".mdx", ".txt", ".rst", ".json", ".yaml", ".yml") _UNSAFE_DESERIALIZATION_REMINDER = """⚠️ Security Warning: Loading pickle data (or equivalents: cPickle, cloudpickle, dill, marshal, shelve, joblib, pandas.read_pickle, numpy with allow_pickle=True) from untrusted sources allows arbitrary code execution. For simple data, prefer JSON or msgspec. For typed objects, prefer a schema-validated deserializer (msgspec.Struct, pydantic, marshmallow) that constructs only declared types. If this is safe or is explicitly needed, briefly document that in a comment before continuing.""" _UNSAFE_YAML_LOAD_REMINDER = """⚠️ Security Warning: yaml.load() / yaml.unsafe_load() execute arbitrary Python via !!python/object tags. Use yaml.safe_load() if the file only contains simple data structures (dicts, lists, strings, numbers). If you need typed objects, parse with safe_load and validate the result against a schema (pydantic, msgspec, marshmallow) — never use a custom Loader that constructs arbitrary types.""" _UNSAFE_TORCH_LOAD_REMINDER = """⚠️ Security Warning: torch.load() defaults to weights_only=False, which unpickles arbitrary Python objects and allows arbitrary code execution. If the file only contains tensors and simple data structures, pass weights_only=True (or set TORCH_FORCE_WEIGHTS_ONLY_LOAD=1).""" # Security patterns configuration SECURITY_PATTERNS = [ { "ruleName": "github_actions_workflow", "path_check": lambda path: ".github/workflows/" in path and (path.endswith(".yml") or path.endswith(".yaml")), "reminder": """⚠️ Security Warning: You are editing a GitHub Actions workflow file. Be aware of these security risks: 1. **Command Injection**: Never use untrusted input (like issue titles, PR descriptions, commit messages) directly in run: commands without proper escaping 2. **Use environment variables**: Instead of ${{ github.event.issue.title }}, use env: with proper quoting 3. **Review the guide**: https://github.blog/security/vulnerability-research/how-to-catch-github-actions-workflow-injections-before-attackers-do/ Example of UNSAFE pattern to avoid: run: echo "${{ github.event.issue.title }}" Example of SAFE pattern: env: TITLE: ${{ github.event.issue.title }} run: echo "$TITLE" Other risky inputs to be careful with: - github.event.issue.body - github.event.pull_request.title - github.event.pull_request.body - github.event.comment.body - github.event.review.body - github.event.review_comment.body - github.event.pages.*.page_name - github.event.commits.*.message - github.event.head_commit.message - github.event.head_commit.author.email - github.event.head_commit.author.name - github.event.commits.*.author.email - github.event.commits.*.author.name - github.event.pull_request.head.ref - github.event.pull_request.head.label - github.event.pull_request.head.repo.default_branch - github.event.client_payload.* (repository_dispatch events — attacker can set any field) 4. **Ref injection**: Never use untrusted input in `ref:` parameters of `actions/checkout`. For `client_payload.pr_number`, validate it matches `^[0-9]+$` before using in `ref: refs/pull/${{ ... }}/head` - github.head_ref""", }, { "ruleName": "child_process_exec", # Gate to JS/TS files — bare `exec(` otherwise fires on Python's # exec() and on prose/docstrings mentioning exec. "path_filter": lambda p: p.endswith(_JS_EXTS), "substrings": ["child_process.exec", "execSync("], "regex": r"(? o[k], root); for computation use a safe expression parser. NEVER interpolate untrusted strings into new Function() bodies.", }, { "ruleName": "eval_injection", # Lookbehind excludes `.` so method calls like PyTorch model.eval(), # redis.eval(), spec.eval() don't match. Skip doc/prose files. "path_filter": lambda p: not p.endswith(_DOC_EXTS), "regex": r"(?]{0,400}integrity\s*=)" r"[^>]{0,200}src\s*=\s*[\x22\x27](?:https?:)?//" r"[^\x22\x27]{1,300}[\x22\x27]" r"[^>]{0,100}>" ), "reminder": '⚠️ Security Warning: Add integrity="sha384-..." crossorigin="anonymous" to external script tags. Loading scripts without Subresource Integrity exposes you to CDN compromise.', }, { "ruleName": "torch_unsafe_load", # Suppressed by weights_only=True on the same line (within 200 chars). weights_only=False # still triggers. Multi-line calls false-positive — same known limitation as unsafe_yaml_load. "regex": r"(?:\btorch\.load|\.torch_load)\s*\((?![^)\n]{0,200}weights_only\s*=\s*True)", "reminder": _UNSAFE_TORCH_LOAD_REMINDER, }, { "ruleName": "yaml_unsafe_load_variants", # yaml.unsafe_load (stdlib alias) plus unsafe wrapper method names seen in the wild. # Bare yaml.load() is unsafe_yaml_load's job (RuleId 12). "regex": r"(?:\byaml\.unsafe_load|\.yaml_unsafe_load)\s*\(", "reminder": _UNSAFE_YAML_LOAD_REMINDER, }, { "ruleName": "pickle_wrapper_load", # Library APIs that unpickle without saying "pickle". numpy.load only triggers # when allow_pickle=True is explicit (defaults to False since numpy 1.16.3). "regex": r"\bjoblib\.load\s*\(|\b(?:pd|pandas)\.read_pickle\s*\(|\.cloudpickle_load\s*\(|\b(?:np|numpy)\.load\s*\([^)\n]{0,200}allow_pickle\s*=\s*True", "reminder": _UNSAFE_DESERIALIZATION_REMINDER, }, ] class RuleId(IntEnum): """ Stable numeric IDs for SECURITY_PATTERNS rules, emitted via the PostToolUse metrics field so telemetry can attribute pattern-warning events to specific checks. The metrics schema only allows bool|number values (no strings), so rule names can't be sent directly. Values are frozen: do not renumber existing entries. Append new ones. """ GITHUB_ACTIONS_WORKFLOW = 1 CHILD_PROCESS_EXEC = 2 NEW_FUNCTION_INJECTION = 3 EVAL_INJECTION = 4 REACT_DANGEROUSLY_SET_HTML = 5 DOCUMENT_WRITE_XSS = 6 INNERHTML_XSS = 7 PICKLE_DESERIALIZATION = 8 OS_SYSTEM_INJECTION = 9 PYTHON_SUBPROCESS_SHELL = 10 GO_EXEC_SHELL_INJECTION = 11 UNSAFE_YAML_LOAD = 12 NODE_CREATECIPHER_NO_IV = 13 AES_ECB_MODE = 14 TLS_VERIFICATION_DISABLED = 15 MARSHAL_LOADS = 16 SHELVE_OPEN = 17 XML_UNSAFE_PARSE = 18 PICKLE_VARIANTS_LOAD = 19 OUTERHTML_XSS = 20 INSERTADJACENTHTML_XSS = 21 SCRIPT_SRC_WITHOUT_SRI = 22 TORCH_UNSAFE_LOAD = 23 YAML_UNSAFE_LOAD_VARIANTS = 24 PICKLE_WRAPPER_LOAD = 25 _RULE_NAME_TO_ID = { "github_actions_workflow": RuleId.GITHUB_ACTIONS_WORKFLOW, "child_process_exec": RuleId.CHILD_PROCESS_EXEC, "new_function_injection": RuleId.NEW_FUNCTION_INJECTION, "eval_injection": RuleId.EVAL_INJECTION, "react_dangerously_set_html": RuleId.REACT_DANGEROUSLY_SET_HTML, "document_write_xss": RuleId.DOCUMENT_WRITE_XSS, "innerHTML_xss": RuleId.INNERHTML_XSS, "pickle_deserialization": RuleId.PICKLE_DESERIALIZATION, "os_system_injection": RuleId.OS_SYSTEM_INJECTION, "python_subprocess_shell": RuleId.PYTHON_SUBPROCESS_SHELL, "go_exec_shell_injection": RuleId.GO_EXEC_SHELL_INJECTION, "unsafe_yaml_load": RuleId.UNSAFE_YAML_LOAD, "node_createcipher_no_iv": RuleId.NODE_CREATECIPHER_NO_IV, "aes_ecb_mode": RuleId.AES_ECB_MODE, "tls_verification_disabled": RuleId.TLS_VERIFICATION_DISABLED, "marshal_loads": RuleId.MARSHAL_LOADS, "shelve_open": RuleId.SHELVE_OPEN, "xml_unsafe_parse": RuleId.XML_UNSAFE_PARSE, "pickle_variants_load": RuleId.PICKLE_VARIANTS_LOAD, "outerHTML_xss": RuleId.OUTERHTML_XSS, "insertAdjacentHTML_xss": RuleId.INSERTADJACENTHTML_XSS, "script_src_without_sri": RuleId.SCRIPT_SRC_WITHOUT_SRI, "torch_unsafe_load": RuleId.TORCH_UNSAFE_LOAD, "yaml_unsafe_load_variants": RuleId.YAML_UNSAFE_LOAD_VARIANTS, "pickle_wrapper_load": RuleId.PICKLE_WRAPPER_LOAD, } # Fail loudly at import time if a pattern is added without a RuleId. # This fires in pytest on every PR, so desync is caught before merge. assert set(_RULE_NAME_TO_ID) == {p["ruleName"] for p in SECURITY_PATTERNS}, ( f"RuleId enum out of sync with SECURITY_PATTERNS: " f"missing={set(p['ruleName'] for p in SECURITY_PATTERNS) - set(_RULE_NAME_TO_ID)}, " f"extra={set(_RULE_NAME_TO_ID) - set(p['ruleName'] for p in SECURITY_PATTERNS)}" ) def rule_names_to_mask(rule_names): """Pack a set of rule names into a bitmask. Bit N set means RuleId(N) matched. User-defined patterns (rule_name starting with "user:") have no static RuleId and are excluded from the mask.""" mask = 0 for name in rule_names: if name in _RULE_NAME_TO_ID: mask |= 1 << _RULE_NAME_TO_ID[name] return mask