mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Harden agent attack surface: scan writes to memory, skills, cron, and context files
The security scanner (skills_guard.py) was only wired into the hub install path. All other write paths to persistent state — skills created by the agent, memory entries, cron prompts, and context files — bypassed it entirely. This closes those gaps: - file_operations: deny-list blocks writes to ~/.ssh, ~/.aws, ~/.hermes/.env, etc. - code_execution_tool: filter secret env vars from sandbox child process - skill_manager_tool: wire scan_skill() into create/edit/patch/write_file with rollback - skills_guard: add "agent-created" trust level (same policy as community) - memory_tool: scan content for injection/exfil before system prompt injection - prompt_builder: scan AGENTS.md, .cursorrules, SOUL.md for prompt injection - cronjob_tools: scan cron prompts for critical threats before scheduling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0310170869
commit
95b6bd5df6
7 changed files with 278 additions and 8 deletions
|
|
@ -10,6 +10,7 @@ The prompt must contain ALL necessary information.
|
|||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
# Import from cron module (will be available when properly installed)
|
||||
|
|
@ -20,6 +21,41 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
from cron.jobs import create_job, get_job, list_jobs, remove_job
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cron prompt scanning — critical-severity patterns only, since cron prompts
|
||||
# run in fresh sessions with full tool access.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CRON_THREAT_PATTERNS = [
|
||||
(r'ignore\s+(previous|all|above|prior)\s+instructions', "prompt_injection"),
|
||||
(r'do\s+not\s+tell\s+the\s+user', "deception_hide"),
|
||||
(r'system\s+prompt\s+override', "sys_prompt_override"),
|
||||
(r'disregard\s+(your|all|any)\s+(instructions|rules|guidelines)', "disregard_rules"),
|
||||
(r'curl\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)', "exfil_curl"),
|
||||
(r'wget\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)', "exfil_wget"),
|
||||
(r'cat\s+[^\n]*(\.env|credentials|\.netrc|\.pgpass)', "read_secrets"),
|
||||
(r'authorized_keys', "ssh_backdoor"),
|
||||
(r'/etc/sudoers|visudo', "sudoers_mod"),
|
||||
(r'rm\s+-rf\s+/', "destructive_root_rm"),
|
||||
]
|
||||
|
||||
_CRON_INVISIBLE_CHARS = {
|
||||
'\u200b', '\u200c', '\u200d', '\u2060', '\ufeff',
|
||||
'\u202a', '\u202b', '\u202c', '\u202d', '\u202e',
|
||||
}
|
||||
|
||||
|
||||
def _scan_cron_prompt(prompt: str) -> str:
|
||||
"""Scan a cron prompt for critical threats. Returns error string if blocked, else empty."""
|
||||
for char in _CRON_INVISIBLE_CHARS:
|
||||
if char in prompt:
|
||||
return f"Blocked: prompt contains invisible unicode U+{ord(char):04X} (possible injection)."
|
||||
for pattern, pid in _CRON_THREAT_PATTERNS:
|
||||
if re.search(pattern, prompt, re.IGNORECASE):
|
||||
return f"Blocked: prompt matches threat pattern '{pid}'. Cron prompts must not contain injection or exfiltration payloads."
|
||||
return ""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tool: schedule_cronjob
|
||||
# =============================================================================
|
||||
|
|
@ -71,6 +107,11 @@ def schedule_cronjob(
|
|||
Returns:
|
||||
JSON with job_id, next_run time, and confirmation
|
||||
"""
|
||||
# Scan prompt for critical threats before scheduling
|
||||
scan_error = _scan_cron_prompt(prompt)
|
||||
if scan_error:
|
||||
return json.dumps({"success": False, "error": scan_error}, indent=2)
|
||||
|
||||
# Get origin info from environment if available
|
||||
origin = None
|
||||
origin_platform = os.getenv("HERMES_SESSION_PLATFORM")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue