From 1a4010edf5429a1cbd9bc4bafe0798a16d232d57 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sat, 6 Jun 2026 08:27:12 -0700 Subject: [PATCH] test(approval): regression for shell-escape denylist bypass (#36846, #36847) --- .../test_managed_browserbase_and_modal.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py index f705380991c..96ab53ae090 100644 --- a/tests/tools/test_managed_browserbase_and_modal.py +++ b/tests/tools/test_managed_browserbase_and_modal.py @@ -593,3 +593,27 @@ def test_terminal_tool_respects_direct_modal_mode_without_falling_back_to_manage }, task_id="task-modal-direct-only", ) + + +class TestShellEscapeBypass: + """Regression for #36846/#36847: backslash escapes and empty-string + literals split tokens so a denylisted command (rm) slips past detection + while the shell still executes it.""" + + def test_backslash_escape_bypass_caught(self): + from tools.approval import detect_dangerous_command + # literal: r-backslash-m -rf / (shell collapses r\m -> rm) + assert detect_dangerous_command("r\\m -rf /")[0] is True + + def test_empty_string_literal_bypass_caught(self): + from tools.approval import detect_dangerous_command + assert detect_dangerous_command("r''m -rf /")[0] is True + assert detect_dangerous_command('r""m -rf /')[0] is True + + def test_plain_dangerous_still_caught(self): + from tools.approval import detect_dangerous_command + assert detect_dangerous_command("rm -rf /")[0] is True + + def test_benign_command_not_flagged(self): + from tools.approval import detect_dangerous_command + assert detect_dangerous_command("ls -la")[0] is False