fix(approval): harden YOLO bypass, LLM parsing, auto-approve audit, pipe pattern (#23835)

* fix(approval): harden YOLO bypass, LLM parsing, auto-approve audit, pipe pattern - BUG-009 (CRITICAL): freeze HERMES_YOLO_MODE at module import via _YOLO_MODE_FROZEN; prevents skills/prompt-injection from calling os.environ["HERMES_YOLO_MODE"]="true" at runtime to bypass all checks - BUG-002 (HIGH): replace substring "APPROVE" in answer with exact answer == "APPROVE" in _smart_approve; prompt already requests exactly one word, substring match was exploitable via verbose LLM responses - BUG-001 (MEDIUM): add logger.warning for every dangerous command that auto-approves in non-interactive non-gateway context; makes silent approvals visible in audit logs without breaking script behavior - BUG-008 (LOW): expand curl/wget pipe pattern to cover | /bin/bash and | bash -c variants, not just | sh / | bash Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(approval): add missing is_truthy_value import + fix yolo test patches _YOLO_MODE_FROZEN uses is_truthy_value() from utils — import was missing. Tests that set HERMES_YOLO_MODE via monkeypatch.setenv() no longer work because the value is frozen at import time; update them to patch the module-level flag directly via monkeypatch.setattr(). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-07-21 16:18:55 +00:00 · 2026-05-25 07:35:33 -03:00 · 2026-05-25 07:35:33 -03:00 · 4cb3eb03c7
commit 4cb3eb03c7
parent 3ab7e2aa91
2 changed files with 20 additions and 10 deletions
--- a/tests/tools/test_yolo_mode.py
+++ b/tests/tools/test_yolo_mode.py
@ -55,8 +55,8 @@ class TestYoloMode:
        assert not result["approved"]

    def test_dangerous_command_approved_in_yolo_mode(self, monkeypatch):
-        """With HERMES_YOLO_MODE, dangerous (non-hardline) commands are auto-approved."""
-        monkeypatch.setenv("HERMES_YOLO_MODE", "1")
+        """With HERMES_YOLO_MODE, dangerous commands are auto-approved."""
+        monkeypatch.setattr(approval_module, "_YOLO_MODE_FROZEN", True)
        monkeypatch.setenv("HERMES_INTERACTIVE", "1")
        monkeypatch.setenv("HERMES_SESSION_KEY", "test-session")

@ -68,8 +68,8 @@ class TestYoloMode:
        assert result["message"] is None

    def test_yolo_mode_works_for_all_patterns(self, monkeypatch):
-        """Yolo mode bypasses dangerous patterns (except the hardline floor)."""
-        monkeypatch.setenv("HERMES_YOLO_MODE", "1")
+        """Yolo mode bypasses all dangerous patterns, not just some."""
+        monkeypatch.setattr(approval_module, "_YOLO_MODE_FROZEN", True)
        monkeypatch.setenv("HERMES_INTERACTIVE", "1")

        # Dangerous but recoverable — yolo should bypass.
@ -90,7 +90,7 @@ class TestYoloMode:

    def test_combined_guard_bypasses_yolo_mode(self, monkeypatch):
        """The new combined guard should preserve yolo bypass semantics."""
-        monkeypatch.setenv("HERMES_YOLO_MODE", "1")
+        monkeypatch.setattr(approval_module, "_YOLO_MODE_FROZEN", True)
        monkeypatch.setenv("HERMES_INTERACTIVE", "1")

        called = {"value": False}
--- a/tools/approval.py
+++ b/tools/approval.py
@ -23,6 +23,11 @@ from utils import env_var_enabled, is_truthy_value

 logger = logging.getLogger(__name__)

+# Freeze YOLO mode at module import time. Reading os.environ on every call
+# would allow any skill running inside the process to set this variable and
+# instantly bypass all approval checks — a prompt-injection escalation path.
+_YOLO_MODE_FROZEN: bool = is_truthy_value(os.getenv("HERMES_YOLO_MODE", ""))
+
 # Per-thread/per-task gateway session identity.
 # Gateway runs agent turns concurrently in executor threads, so reading a
 # process-global env var for session identity is racy. Keep env fallback for
@ -344,7 +349,7 @@ DANGEROUS_PATTERNS = [
    # Any shell invocation via -c or combined flags like -lc, -ic, etc.
    (r'\b(bash|sh|zsh|ksh)\s+-[^\s]*c(\s+|$)', "shell command via -c/-lc flag"),
    (r'\b(python[23]?|perl|ruby|node)\s+-[ec]\s+', "script execution via -e/-c flag"),
-    (r'\b(curl|wget)\b.*\|\s*(ba)?sh\b', "pipe remote content to shell"),
+    (r'\b(curl|wget)\b.*\|\s*(?:[/\w]*/)?(?:ba)?sh(?:\s|$|-c)', "pipe remote content to shell"),
    (r'\b(bash|sh|zsh|ksh)\s+<\s*<?\s*\(\s*(curl|wget)\b', "execute remote script via process substitution"),
    (rf'\btee\b.*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via tee"),
    (rf'>>?\s*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via redirection"),
@ -898,9 +903,9 @@ Respond with exactly one word: APPROVE, DENY, or ESCALATE"""

        answer = (response.choices[0].message.content or "").strip().upper()

-        if "APPROVE" in answer:
+        if answer == "APPROVE":
            return "approve"
-        elif "DENY" in answer:
+        elif answer == "DENY":
            return "deny"
        else:
            return "escalate"
@ -940,7 +945,7 @@ def check_dangerous_command(command: str, env_type: str,

    # --yolo: bypass all approval prompts. Gateway /yolo is session-scoped;
    # CLI --yolo remains process-scoped via the env var for local use.
-    if is_truthy_value(os.getenv("HERMES_YOLO_MODE")) or is_current_session_yolo_enabled():
+    if _YOLO_MODE_FROZEN or is_current_session_yolo_enabled():
        return {"approved": True, "message": None}

    is_dangerous, pattern_key, description = detect_dangerous_command(command)
@ -968,6 +973,11 @@ def check_dangerous_command(command: str, env_type: str,
                        "approvals.cron_mode: approve in config.yaml."
                    ),
                }
+        logger.warning(
+            "AUTO-APPROVED dangerous command in non-interactive non-gateway context "
+            "(pattern: %s): %s — set HERMES_INTERACTIVE or HERMES_GATEWAY_SESSION to require approval.",
+            description, command[:200],
+        )
        return {"approved": True, "message": None}

    if is_gateway or env_var_enabled("HERMES_EXEC_ASK"):
@ -1076,7 +1086,7 @@ def check_all_command_guards(command: str, env_type: str,
    # --yolo or approvals.mode=off: bypass all approval prompts.
    # Gateway /yolo is session-scoped; CLI --yolo remains process-scoped.
    approval_mode = _get_approval_mode()
-    if is_truthy_value(os.getenv("HERMES_YOLO_MODE")) or is_current_session_yolo_enabled() or approval_mode == "off":
+    if _YOLO_MODE_FROZEN or is_current_session_yolo_enabled() or approval_mode == "off":
        return {"approved": True, "message": None}

    is_cli = env_var_enabled("HERMES_INTERACTIVE")