diff --git a/tests/tools/test_terminal_compound_background.py b/tests/tools/test_terminal_compound_background.py new file mode 100644 index 000000000..d8922bcf5 --- /dev/null +++ b/tests/tools/test_terminal_compound_background.py @@ -0,0 +1,180 @@ +"""Regression tests for _rewrite_compound_background. + +Context: bash parses ``A && B &`` as ``(A && B) &`` — it forks a subshell +for the compound and backgrounds the subshell. Inside the subshell, B +runs foreground, so the subshell waits for B. When B never exits on its +own (HTTP servers, ``yes > /dev/null``, etc.), the subshell is stuck in +``wait4`` forever and leaks as an orphan process. Pre-fix, we saw this +pattern leak processes across the fleet (vela, sal, combiagent). + +The rewriter fixes this by wrapping the tail in a brace group — +``A && { B & }`` — so B runs as a simple backgrounded command inside +the current shell. No subshell fork, no wait. +""" + +import pytest + +from tools.terminal_tool import _rewrite_compound_background as rewrite + + +class TestRewrites: + """Commands that trigger the subshell-wait bug MUST be rewritten.""" + + def test_simple_and_background(self): + assert rewrite("A && B &") == "A && { B & }" + + def test_or_background(self): + assert rewrite("A || B &") == "A || { B & }" + + def test_chained_and(self): + assert rewrite("A && B && C &") == "A && B && { C & }" + + def test_chained_or(self): + assert rewrite("A || B || C &") == "A || B || { C & }" + + def test_mixed_and_or(self): + assert rewrite("A && B || C &") == "A && B || { C & }" + + def test_realistic_server_start(self): + # The exact shape observed in the vela incident. + cmd = ( + "cd /home/exedev && python3 -m http.server 8000 &>/dev/null &\n" + "sleep 1\n" + 'curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/' + ) + expected = ( + "cd /home/exedev && { python3 -m http.server 8000 &>/dev/null & }\n" + "sleep 1\n" + 'curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/' + ) + assert rewrite(cmd) == expected + + def test_newline_resets_chain_state(self): + # A && newline starts a new statement; B & on its own line is simple. + cmd = "A && B\nC &" + assert rewrite(cmd) == "A && B\nC &" + + def test_semicolon_resets_chain_state(self): + cmd = "A && B; C &" + assert rewrite(cmd) == "A && B; C &" + + def test_pipe_resets_chain_state(self): + cmd = "A && B | C &" + assert rewrite(cmd) == "A && B | C &" + + def test_multiple_rewrites_in_one_script(self): + cmd = "A && B &\nfalse || C &" + assert rewrite(cmd) == "A && { B & }\nfalse || { C & }" + + +class TestPreserved: + """Commands that DON'T have the bug MUST pass through unchanged.""" + + def test_simple_background(self): + # No compound — just background a single command. Works fine as-is. + assert rewrite("sleep 5 &") == "sleep 5 &" + + def test_plain_server_background(self): + assert rewrite("python3 -m http.server 0 &") == "python3 -m http.server 0 &" + + def test_semicolon_sequence(self): + assert rewrite("cd /tmp; start-server &") == "cd /tmp; start-server &" + + def test_no_trailing_ampersand(self): + assert rewrite("A && B") == "A && B" + + def test_no_chain_at_all(self): + assert rewrite("echo hello") == "echo hello" + + def test_empty_string(self): + assert rewrite("") == "" + + def test_whitespace_only(self): + assert rewrite(" \n\t") == " \n\t" + + +class TestRedirectsNotConfused: + """``&>``, ``2>&1``, ``>&2`` must not be mistaken for background ``&``.""" + + def test_amp_gt_redirect_alone(self): + assert rewrite("echo hi &>/dev/null") == "echo hi &>/dev/null" + + def test_fd_to_fd_redirect(self): + assert rewrite("cmd 2>&1") == "cmd 2>&1" + + def test_fd_redirect_with_trailing_bg(self): + # 2>&1 is redirect; trailing & is simple bg (no compound). + assert rewrite("cmd 2>&1 &") == "cmd 2>&1 &" + + def test_amp_gt_inside_compound_background(self): + # &> should be preserved; the trailing & still needs wrapping. + cmd = "A && B &>/dev/null &" + assert rewrite(cmd) == "A && { B &>/dev/null & }" + + def test_gt_amp_inside_compound(self): + cmd = "A && B 2>&1 &" + assert rewrite(cmd) == "A && { B 2>&1 & }" + + +class TestQuotingAndParens: + """Shell metacharacters inside quotes/parens must not be parsed as operators.""" + + def test_and_and_inside_single_quotes(self): + cmd = "echo 'A && B &'" + assert rewrite(cmd) == "echo 'A && B &'" + + def test_and_and_inside_double_quotes(self): + cmd = 'echo "A && B &"' + assert rewrite(cmd) == 'echo "A && B &"' + + def test_parenthesised_subshell_left_alone(self): + # `(A && B) &` has the same bug class but isn't the common agent + # pattern. Leave for a follow-up; do not rewrite and do not + # misrewrite content inside the parens. + assert rewrite("(A && B) &") == "(A && B) &" + + def test_command_substitution_not_rewritten(self): + # $(A && B) is command substitution; the `&&` inside is a compound + # expression in the subshell, unrelated to the outer `&`. + cmd = 'echo "$(A && B)" &' + assert rewrite(cmd) == 'echo "$(A && B)" &' + + def test_backslash_escaped_ampersand(self): + # Escaped & is not a background operator. + cmd = r"echo A \&\& B" + assert rewrite(cmd) == cmd + + def test_comment_line_not_rewritten(self): + cmd = "# A && B &\nC" + assert rewrite(cmd) == "# A && B &\nC" + + +class TestIdempotence: + """Running the rewriter twice should be a no-op on its own output.""" + + def test_already_rewritten(self): + once = rewrite("A && B &") + twice = rewrite(once) + assert once == twice + assert twice == "A && { B & }" + + def test_multiline_idempotent(self): + once = rewrite("cd /tmp && server &\nsleep 1") + assert rewrite(once) == once + + +class TestEdgeCases: + def test_only_chain_op_no_second_command(self): + # Malformed input: bash would error, we shouldn't crash or rewrite. + cmd = "A && &" + # Don't assert a specific output; just don't raise. + rewrite(cmd) + + def test_only_trailing_ampersand(self): + assert rewrite("&") == "&" + + def test_leading_whitespace(self): + assert rewrite(" A && B &") == " A && { B & }" + + def test_tabs_between_tokens(self): + assert rewrite("A\t&&\tB\t&") == "A\t&&\t{ B\t& }" diff --git a/tools/environments/base.py b/tools/environments/base.py index cde78e1d4..19a637901 100644 --- a/tools/environments/base.py +++ b/tools/environments/base.py @@ -705,6 +705,13 @@ class BaseEnvironment(ABC): self._before_execute() exec_command, sudo_stdin = self._prepare_command(command) + # Guard against the `A && B &` subshell-wait trap: bash forks a + # subshell for the compound that then waits for an infinite B (a + # server, `yes > /dev/null`, etc.), leaking the subshell forever. + # Rewriting to `A && { B & }` runs B as a plain background in the + # current shell — no subshell wait. + from tools.terminal_tool import _rewrite_compound_background + exec_command = _rewrite_compound_background(exec_command) effective_timeout = timeout or self.timeout effective_cwd = cwd or self.cwd diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index b8b69856d..6a69a3b83 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -442,6 +442,171 @@ def _rewrite_real_sudo_invocations(command: str) -> tuple[str, bool]: return "".join(out), found +def _rewrite_compound_background(command: str) -> str: + """Wrap `A && B &` (or `A || B &`) to `A && { B & }` at depth 0. + + Bash parses ``A && B &`` with `&&` tighter than `&`, so it forks a + subshell for the whole `A && B` compound and backgrounds it. Inside + the subshell, `B` runs foreground, so the subshell waits for `B` to + finish. When `B` is a long-running process (`python3 -m http.server`, + `yes > /dev/null`, anything that doesn't naturally exit), the subshell + never exits. It leaks as a process stuck in ``wait4`` forever — and + on the way, its open stdout pipe can prevent the terminal tool from + returning promptly. + + Rewriting the tail to `A && { B & }` preserves `&&`'s error semantics + (skip B if A fails) while replacing the subshell with a brace group. + The brace group runs in the current shell (no fork), backgrounds B as + a simple command (bash doesn't wait for it in non-interactive mode), + and exits immediately. B runs as a normal backgrounded child, orphaned + when the parent shell exits. + + Handles redirects (``&>``, ``2>&1``) and skips content inside quoted + strings and parenthesised subshells. Leaves simple ``cmd &`` alone — + that construct doesn't have the subshell-wait bug. + """ + n = len(command) + i = 0 + paren_depth = 0 + brace_depth = 0 + # Position in *command* just after the most recent `&&` / `||` at depth 0 + # in the current statement; -1 when no chain operator is active. + last_chain_op_end = -1 + rewrites: list[tuple[int, int]] = [] # (chain_op_end, amp_pos) + + while i < n: + ch = command[i] + + # Newline terminates a statement at depth 0 — reset chain state. + # Checked before the whitespace skip so we don't miss it. + if ch == "\n" and paren_depth == 0 and brace_depth == 0: + last_chain_op_end = -1 + i += 1 + continue + + if ch.isspace(): + i += 1 + continue + + # Comments (only at statement start — conservative: any `#` not inside + # a token ends the line). `_read_shell_token` handles quoted strings + # below so `#` inside quotes is safe. + if ch == "#": + nl = command.find("\n", i) + if nl == -1: + break + i = nl + continue + + if ch == "\\" and i + 1 < n: + i += 2 + continue + + # Quoted tokens — consume whole string via the shared tokenizer. + if ch in ("'", '"'): + _, next_i = _read_shell_token(command, i) + i = max(next_i, i + 1) + continue + + if ch == "(": + paren_depth += 1 + i += 1 + continue + + if ch == ")": + paren_depth = max(0, paren_depth - 1) + i += 1 + continue + + # Brace groups: `{ ... }` is a group (no subshell fork), and bash + # requires whitespace after `{`. We track depth so already-rewritten + # output (`A && { B & }`) is idempotent — the inner `&` is part of + # the group, not a new compound to rewrite. Also skip content inside + # the group since `A && B &` there is separately well-formed. + if ch == "{" and i + 1 < n and (command[i + 1].isspace() or command[i + 1] == "\n"): + brace_depth += 1 + i += 1 + continue + if ch == "}" and brace_depth > 0: + brace_depth -= 1 + # Closing a group completes a compound statement; reset chain. + last_chain_op_end = -1 + i += 1 + continue + + # Inside parens or brace groups, skip operators — they parse in their + # own scope. `(...)` subshells have the same bug class but are not the + # common agent pattern; leave for a follow-up. + if paren_depth > 0 or brace_depth > 0: + i += 1 + continue + + # Chain operators at depth 0 + if command.startswith("&&", i) or command.startswith("||", i): + last_chain_op_end = i + 2 + i += 2 + continue + + # Statement terminators reset the chain state + if ch == ";": + last_chain_op_end = -1 + i += 1 + continue + + # Single `|` (pipe) starts a new pipeline stage; don't rewrite + # across it. `||` handled above. + if ch == "|": + last_chain_op_end = -1 + i += 1 + continue + + # `&` handling: distinguish `&&`, `&>`, fd redirect (`>&`, `<&`), + # and a true backgrounding `&`. + if ch == "&": + # `&&` handled above; won't reach here + if i + 1 < n and command[i + 1] == ">": + # `&>` redirect — consume + i += 2 + continue + # `>&` / `<&` fd target — look back past whitespace + j = i - 1 + while j >= 0 and command[j].isspace(): + j -= 1 + if j >= 0 and command[j] in "<>": + i += 1 + continue + # Real background operator + if last_chain_op_end >= 0: + rewrites.append((last_chain_op_end, i)) + last_chain_op_end = -1 + i += 1 + continue + + # Regular unquoted token — advance past it via the shared tokenizer + _, next_i = _read_shell_token(command, i) + i = max(next_i, i + 1) + + if not rewrites: + return command + + # Apply rewrites back-to-front so earlier indices remain valid. + result = command + for chain_end, amp_pos in reversed(rewrites): + # Skip whitespace right after the `&&`/`||` so the brace group + # opens flush against the inner command. + insert_pos = chain_end + while insert_pos < amp_pos and result[insert_pos].isspace(): + insert_pos += 1 + prefix = result[:insert_pos] + middle = result[insert_pos:amp_pos] # inner command + trailing space + suffix = result[amp_pos + 1 :] + # `{` needs a trailing space in bash; the closing `}` needs to be + # preceded by `;` or `&` — we're providing `&` from the backgrounding. + result = prefix + "{ " + middle + "& }" + suffix + + return result + + def _transform_sudo_command(command: str | None) -> tuple[str | None, str | None]: """ Transform sudo commands to use -S flag if SUDO_PASSWORD is available.