From 4cb3eb03c750c902919906733b79220588ca5f16 Mon Sep 17 00:00:00 2001 From: Rodrigo Date: Mon, 25 May 2026 07:35:33 -0300 Subject: [PATCH] fix(approval): harden YOLO bypass, LLM parsing, auto-approve audit, pipe pattern (#23835) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(approval): harden YOLO bypass, LLM parsing, auto-approve audit, pipe pattern - BUG-009 (CRITICAL): freeze HERMES_YOLO_MODE at module import via _YOLO_MODE_FROZEN; prevents skills/prompt-injection from calling os.environ["HERMES_YOLO_MODE"]="true" at runtime to bypass all checks - BUG-002 (HIGH): replace substring "APPROVE" in answer with exact answer == "APPROVE" in _smart_approve; prompt already requests exactly one word, substring match was exploitable via verbose LLM responses - BUG-001 (MEDIUM): add logger.warning for every dangerous command that auto-approves in non-interactive non-gateway context; makes silent approvals visible in audit logs without breaking script behavior - BUG-008 (LOW): expand curl/wget pipe pattern to cover | /bin/bash and | bash -c variants, not just | sh / | bash Co-Authored-By: Claude Sonnet 4.6 * fix(approval): add missing is_truthy_value import + fix yolo test patches _YOLO_MODE_FROZEN uses is_truthy_value() from utils — import was missing. Tests that set HERMES_YOLO_MODE via monkeypatch.setenv() no longer work because the value is frozen at import time; update them to patch the module-level flag directly via monkeypatch.setattr(). Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Claude Sonnet 4.6 --- tests/tools/test_yolo_mode.py | 10 +++++----- tools/approval.py | 20 +++++++++++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tests/tools/test_yolo_mode.py b/tests/tools/test_yolo_mode.py index 29a68f07ae0..ebd3c8ddced 100644 --- a/tests/tools/test_yolo_mode.py +++ b/tests/tools/test_yolo_mode.py @@ -55,8 +55,8 @@ class TestYoloMode: assert not result["approved"] def test_dangerous_command_approved_in_yolo_mode(self, monkeypatch): - """With HERMES_YOLO_MODE, dangerous (non-hardline) commands are auto-approved.""" - monkeypatch.setenv("HERMES_YOLO_MODE", "1") + """With HERMES_YOLO_MODE, dangerous commands are auto-approved.""" + monkeypatch.setattr(approval_module, "_YOLO_MODE_FROZEN", True) monkeypatch.setenv("HERMES_INTERACTIVE", "1") monkeypatch.setenv("HERMES_SESSION_KEY", "test-session") @@ -68,8 +68,8 @@ class TestYoloMode: assert result["message"] is None def test_yolo_mode_works_for_all_patterns(self, monkeypatch): - """Yolo mode bypasses dangerous patterns (except the hardline floor).""" - monkeypatch.setenv("HERMES_YOLO_MODE", "1") + """Yolo mode bypasses all dangerous patterns, not just some.""" + monkeypatch.setattr(approval_module, "_YOLO_MODE_FROZEN", True) monkeypatch.setenv("HERMES_INTERACTIVE", "1") # Dangerous but recoverable — yolo should bypass. @@ -90,7 +90,7 @@ class TestYoloMode: def test_combined_guard_bypasses_yolo_mode(self, monkeypatch): """The new combined guard should preserve yolo bypass semantics.""" - monkeypatch.setenv("HERMES_YOLO_MODE", "1") + monkeypatch.setattr(approval_module, "_YOLO_MODE_FROZEN", True) monkeypatch.setenv("HERMES_INTERACTIVE", "1") called = {"value": False} diff --git a/tools/approval.py b/tools/approval.py index 399b9d6c2d2..18b085e4786 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -23,6 +23,11 @@ from utils import env_var_enabled, is_truthy_value logger = logging.getLogger(__name__) +# Freeze YOLO mode at module import time. Reading os.environ on every call +# would allow any skill running inside the process to set this variable and +# instantly bypass all approval checks — a prompt-injection escalation path. +_YOLO_MODE_FROZEN: bool = is_truthy_value(os.getenv("HERMES_YOLO_MODE", "")) + # Per-thread/per-task gateway session identity. # Gateway runs agent turns concurrently in executor threads, so reading a # process-global env var for session identity is racy. Keep env fallback for @@ -344,7 +349,7 @@ DANGEROUS_PATTERNS = [ # Any shell invocation via -c or combined flags like -lc, -ic, etc. (r'\b(bash|sh|zsh|ksh)\s+-[^\s]*c(\s+|$)', "shell command via -c/-lc flag"), (r'\b(python[23]?|perl|ruby|node)\s+-[ec]\s+', "script execution via -e/-c flag"), - (r'\b(curl|wget)\b.*\|\s*(ba)?sh\b', "pipe remote content to shell"), + (r'\b(curl|wget)\b.*\|\s*(?:[/\w]*/)?(?:ba)?sh(?:\s|$|-c)', "pipe remote content to shell"), (r'\b(bash|sh|zsh|ksh)\s+<\s*>?\s*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via redirection"), @@ -898,9 +903,9 @@ Respond with exactly one word: APPROVE, DENY, or ESCALATE""" answer = (response.choices[0].message.content or "").strip().upper() - if "APPROVE" in answer: + if answer == "APPROVE": return "approve" - elif "DENY" in answer: + elif answer == "DENY": return "deny" else: return "escalate" @@ -940,7 +945,7 @@ def check_dangerous_command(command: str, env_type: str, # --yolo: bypass all approval prompts. Gateway /yolo is session-scoped; # CLI --yolo remains process-scoped via the env var for local use. - if is_truthy_value(os.getenv("HERMES_YOLO_MODE")) or is_current_session_yolo_enabled(): + if _YOLO_MODE_FROZEN or is_current_session_yolo_enabled(): return {"approved": True, "message": None} is_dangerous, pattern_key, description = detect_dangerous_command(command) @@ -968,6 +973,11 @@ def check_dangerous_command(command: str, env_type: str, "approvals.cron_mode: approve in config.yaml." ), } + logger.warning( + "AUTO-APPROVED dangerous command in non-interactive non-gateway context " + "(pattern: %s): %s — set HERMES_INTERACTIVE or HERMES_GATEWAY_SESSION to require approval.", + description, command[:200], + ) return {"approved": True, "message": None} if is_gateway or env_var_enabled("HERMES_EXEC_ASK"): @@ -1076,7 +1086,7 @@ def check_all_command_guards(command: str, env_type: str, # --yolo or approvals.mode=off: bypass all approval prompts. # Gateway /yolo is session-scoped; CLI --yolo remains process-scoped. approval_mode = _get_approval_mode() - if is_truthy_value(os.getenv("HERMES_YOLO_MODE")) or is_current_session_yolo_enabled() or approval_mode == "off": + if _YOLO_MODE_FROZEN or is_current_session_yolo_enabled() or approval_mode == "off": return {"approved": True, "message": None} is_cli = env_var_enabled("HERMES_INTERACTIVE")