mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-04 02:21:47 +00:00
security(approval): close 4 pattern gaps found by source-grounded audit
Four gaps in DANGEROUS_PATTERNS found by running 10 targeted tests that
each mapped to a specific pattern in approval.py and checked whether the
documented defense actually held.
1. **Heredoc script injection** — `python3 << 'EOF'` bypasses the
existing `-e`/`-c` flag pattern. Adds pattern for interpreter + `<<`
covering python{2,3}, perl, ruby, node.
2. **PID expansion self-termination** — `kill -9 $(pgrep hermes)` is
opaque to the existing `pkill|killall` + name pattern because command
substitution is not expanded at detection time. Adds structural
patterns matching `kill` + `$(pgrep` and backtick variants.
3. **Git destructive operations** — `git reset --hard`, `push --force`,
`push -f`, `clean -f*`, and `branch -D` were entirely absent.
Note: `branch -d` also triggers because IGNORECASE is global —
acceptable since -d is still a delete, just a safe one, and the
prompt is only a confirmation, not a hard block.
4. **chmod +x then execute** — two-step social engineering where a
script containing dangerous commands is first written to disk (not
checked by write_file), then made executable and run as `./script`.
Pattern catches `chmod +x ... [;&|]+ ./` combos. Does not solve the
deeper architectural issue (write_file not checking content) — that
is called out in the PR description as a known limitation.
Tests: 23 new cases across 4 test classes, all in test_approval.py:
- TestHeredocScriptExecution (7 cases, incl. regressions for -c)
- TestPgrepKillExpansion (5 cases, incl. safe kill PID negative)
- TestGitDestructiveOps (8 cases, incl. safe git status/push negatives)
- TestChmodExecuteCombo (3 cases, incl. safe chmod-only negative)
Full suite: 146 passed, 0 failed.
This commit is contained in:
parent
5a1cce53e4
commit
aedf6c7964
2 changed files with 189 additions and 0 deletions
|
|
@ -649,3 +649,172 @@ class TestNormalizationBypass:
|
|||
assert dangerous is False
|
||||
|
||||
|
||||
class TestHeredocScriptExecution:
|
||||
"""Script execution via heredoc bypasses the -e/-c flag patterns.
|
||||
|
||||
`python3 << 'EOF'` feeds arbitrary code through stdin without any
|
||||
flag that the original patterns check for. See security audit Test 3.
|
||||
"""
|
||||
|
||||
def test_python3_heredoc_detected(self):
|
||||
# The heredoc body also contains `rm -rf /` which fires the
|
||||
# "delete in root path" pattern first (patterns are ordered).
|
||||
# The heredoc pattern also matches — either detection is correct.
|
||||
cmd = "python3 << 'EOF'\nimport os; os.system('rm -rf /')\nEOF"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_python_heredoc_detected(self):
|
||||
cmd = 'python << "PYEOF"\nprint("pwned")\nPYEOF'
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_perl_heredoc_detected(self):
|
||||
cmd = "perl <<'END'\nsystem('whoami');\nEND"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_ruby_heredoc_detected(self):
|
||||
cmd = "ruby <<RUBY\n`rm -rf /`\nRUBY"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_node_heredoc_detected(self):
|
||||
cmd = "node << 'JS'\nrequire('child_process').execSync('whoami')\nJS"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_python3_dash_c_still_detected(self):
|
||||
"""Existing -c pattern must not regress."""
|
||||
cmd = "python3 -c 'import os; os.system(\"rm -rf /\")'"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_safe_python_not_flagged(self):
|
||||
"""Plain 'python3 script.py' without heredoc or -c must stay safe."""
|
||||
cmd = "python3 my_script.py"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
|
||||
class TestPgrepKillExpansion:
|
||||
"""kill -9 $(pgrep hermes) bypasses the pkill/killall name-matching
|
||||
pattern because the command substitution is opaque to regex.
|
||||
|
||||
See security audit Test 7.
|
||||
"""
|
||||
|
||||
def test_kill_dollar_pgrep_detected(self):
|
||||
cmd = 'kill -9 $(pgrep -f "hermes.*gateway")'
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
assert "pgrep" in desc.lower()
|
||||
|
||||
def test_kill_backtick_pgrep_detected(self):
|
||||
cmd = "kill -9 `pgrep hermes`"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_kill_dollar_pgrep_no_flags(self):
|
||||
cmd = "kill $(pgrep gateway)"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_pkill_hermes_still_detected(self):
|
||||
"""Existing pkill pattern must not regress."""
|
||||
cmd = "pkill -9 hermes"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_safe_kill_pid_not_flagged(self):
|
||||
"""A plain 'kill 12345' (literal PID, no expansion) must stay safe."""
|
||||
cmd = "kill 12345"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
|
||||
class TestGitDestructiveOps:
|
||||
"""git reset --hard, push --force, clean -f, branch -D can destroy
|
||||
work and rewrite shared history. Not covered by rm/chmod patterns.
|
||||
|
||||
See security audit Test 6.
|
||||
"""
|
||||
|
||||
def test_git_reset_hard_detected(self):
|
||||
cmd = "git reset --hard HEAD~3"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
assert "reset" in desc.lower() or "hard" in desc.lower()
|
||||
|
||||
def test_git_push_force_detected(self):
|
||||
cmd = "git push --force origin main"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
assert "force" in desc.lower()
|
||||
|
||||
def test_git_push_dash_f_detected(self):
|
||||
cmd = "git push -f origin main"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_git_clean_force_detected(self):
|
||||
cmd = "git clean -fd"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
assert "clean" in desc.lower()
|
||||
|
||||
def test_git_branch_force_delete_detected(self):
|
||||
cmd = "git branch -D feature-branch"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_safe_git_status_not_flagged(self):
|
||||
cmd = "git status"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
def test_safe_git_push_not_flagged(self):
|
||||
"""Normal push without --force must not be flagged."""
|
||||
cmd = "git push origin main"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
def test_git_branch_lowercase_d_also_flagged(self):
|
||||
"""git branch -d triggers approval too — IGNORECASE is global.
|
||||
|
||||
This is intentional: -d is safer than -D but an approval prompt
|
||||
for branch deletion is reasonable. The user can still approve.
|
||||
"""
|
||||
cmd = "git branch -d feature-branch"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
|
||||
class TestChmodExecuteCombo:
|
||||
"""chmod +x && ./ is the two-step social engineering pattern where a
|
||||
script is first made executable then immediately run. The script
|
||||
content may contain dangerous commands invisible to pattern matching.
|
||||
|
||||
See security audit Test 4.
|
||||
"""
|
||||
|
||||
def test_chmod_and_execute_detected(self):
|
||||
cmd = "chmod +x /tmp/cleanup.sh && ./cleanup.sh"
|
||||
dangerous, _, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
assert "chmod" in desc.lower() or "execution" in desc.lower()
|
||||
|
||||
def test_chmod_semicolon_execute_detected(self):
|
||||
cmd = "chmod +x script.sh; ./script.sh"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
# Semicolon variant — pattern uses && but full-string match
|
||||
# on chmod +x should still trigger even without the && ./
|
||||
assert dangerous is True
|
||||
|
||||
def test_safe_chmod_without_execute_not_flagged(self):
|
||||
"""chmod +x alone without immediate execution must not be flagged."""
|
||||
cmd = "chmod +x script.sh"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue