hermes-agent/tests/tools/test_terminal_compound_background.py
handsdiff abfc1847b7 fix(terminal): rewrite A && B & to A && { B & } to prevent subshell leak
bash parses `A && B &` with `&&` tighter than `&`, so it forks a subshell
for the compound and backgrounds the subshell. Inside the subshell, B
runs foreground, so the subshell waits for B. When B is a process that
doesn't naturally exit (`python3 -m http.server`, `yes > /dev/null`, a
long-running daemon), the subshell is stuck in `wait4` forever and leaks
as an orphan reparented to init.

Observed in production: agents running `cd X && python3 -m http.server
8000 &>/dev/null & sleep 1 && curl ...` as a "start a local server, then
verify it" one-liner. Outer bash exits cleanly; the subshell never does.
Across ~3 days of use, 8 unique stuck-terminal events and 7 leaked
bash+server pairs accumulated on the fleet, with some sessions appearing
hung from the user's perspective because the subshell's open stdout pipe
kept the terminal tool's drain thread blocked.

This is distinct from the `set +m` fix in 933fbd8f (which addressed
interactive-shell job-control waiting at exit). `set +m` doesn't help
here because `bash -c` is non-interactive and job control is already
off; the problem is the subshell's own internal wait for its foreground
B, not the outer shell's job-tracking.

The fix: walk the command shell-aware (respecting quotes, parens, brace
groups, `&>`/`>&` redirects), find `A && B &` / `A || B &` at depth 0
and rewrite the tail to `A && { B & }`. Brace groups don't fork a
subshell — they run in the current shell. `B &` inside the group is a
simple background (no subshell wait). The outer `&` is absorbed into
the group, so the compound no longer needs an explicit subshell.

`&&` error-propagation is preserved exactly: if A fails, `&&`
short-circuits and B never runs.

- Skips quoted strings, comment lines, and `(…)` subshells
- Handles `&>/dev/null`, `2>&1`, `>&2` without mistaking them for `&`
- Resets chain state at `;`, `|`, and newlines
- Tracks brace depth so already-rewritten output is idempotent
- Walks using the existing `_read_shell_token` tokenizer, matching the
  pattern of `_rewrite_real_sudo_invocations`

Called once from `BaseEnvironment.execute` right after
`_prepare_command`, so it runs for every backend (local, ssh, docker,
modal, etc.) with no per-backend plumbing.

34 new tests covering rewrite cases, preservation cases, redirect
edge-cases, quoting/parens/backticks, idempotency, and empty/edge
inputs. End-to-end verified on a test VM: the exact vela-incident
command now returns in ~1.3s with no leaked bash, only the intentional
backgrounded server.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 16:53:11 -07:00

180 lines
6.2 KiB
Python

"""Regression tests for _rewrite_compound_background.
Context: bash parses ``A && B &`` as ``(A && B) &`` — it forks a subshell
for the compound and backgrounds the subshell. Inside the subshell, B
runs foreground, so the subshell waits for B. When B never exits on its
own (HTTP servers, ``yes > /dev/null``, etc.), the subshell is stuck in
``wait4`` forever and leaks as an orphan process. Pre-fix, we saw this
pattern leak processes across the fleet (vela, sal, combiagent).
The rewriter fixes this by wrapping the tail in a brace group —
``A && { B & }`` — so B runs as a simple backgrounded command inside
the current shell. No subshell fork, no wait.
"""
import pytest
from tools.terminal_tool import _rewrite_compound_background as rewrite
class TestRewrites:
"""Commands that trigger the subshell-wait bug MUST be rewritten."""
def test_simple_and_background(self):
assert rewrite("A && B &") == "A && { B & }"
def test_or_background(self):
assert rewrite("A || B &") == "A || { B & }"
def test_chained_and(self):
assert rewrite("A && B && C &") == "A && B && { C & }"
def test_chained_or(self):
assert rewrite("A || B || C &") == "A || B || { C & }"
def test_mixed_and_or(self):
assert rewrite("A && B || C &") == "A && B || { C & }"
def test_realistic_server_start(self):
# The exact shape observed in the vela incident.
cmd = (
"cd /home/exedev && python3 -m http.server 8000 &>/dev/null &\n"
"sleep 1\n"
'curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/'
)
expected = (
"cd /home/exedev && { python3 -m http.server 8000 &>/dev/null & }\n"
"sleep 1\n"
'curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/'
)
assert rewrite(cmd) == expected
def test_newline_resets_chain_state(self):
# A && newline starts a new statement; B & on its own line is simple.
cmd = "A && B\nC &"
assert rewrite(cmd) == "A && B\nC &"
def test_semicolon_resets_chain_state(self):
cmd = "A && B; C &"
assert rewrite(cmd) == "A && B; C &"
def test_pipe_resets_chain_state(self):
cmd = "A && B | C &"
assert rewrite(cmd) == "A && B | C &"
def test_multiple_rewrites_in_one_script(self):
cmd = "A && B &\nfalse || C &"
assert rewrite(cmd) == "A && { B & }\nfalse || { C & }"
class TestPreserved:
"""Commands that DON'T have the bug MUST pass through unchanged."""
def test_simple_background(self):
# No compound — just background a single command. Works fine as-is.
assert rewrite("sleep 5 &") == "sleep 5 &"
def test_plain_server_background(self):
assert rewrite("python3 -m http.server 0 &") == "python3 -m http.server 0 &"
def test_semicolon_sequence(self):
assert rewrite("cd /tmp; start-server &") == "cd /tmp; start-server &"
def test_no_trailing_ampersand(self):
assert rewrite("A && B") == "A && B"
def test_no_chain_at_all(self):
assert rewrite("echo hello") == "echo hello"
def test_empty_string(self):
assert rewrite("") == ""
def test_whitespace_only(self):
assert rewrite(" \n\t") == " \n\t"
class TestRedirectsNotConfused:
"""``&>``, ``2>&1``, ``>&2`` must not be mistaken for background ``&``."""
def test_amp_gt_redirect_alone(self):
assert rewrite("echo hi &>/dev/null") == "echo hi &>/dev/null"
def test_fd_to_fd_redirect(self):
assert rewrite("cmd 2>&1") == "cmd 2>&1"
def test_fd_redirect_with_trailing_bg(self):
# 2>&1 is redirect; trailing & is simple bg (no compound).
assert rewrite("cmd 2>&1 &") == "cmd 2>&1 &"
def test_amp_gt_inside_compound_background(self):
# &> should be preserved; the trailing & still needs wrapping.
cmd = "A && B &>/dev/null &"
assert rewrite(cmd) == "A && { B &>/dev/null & }"
def test_gt_amp_inside_compound(self):
cmd = "A && B 2>&1 &"
assert rewrite(cmd) == "A && { B 2>&1 & }"
class TestQuotingAndParens:
"""Shell metacharacters inside quotes/parens must not be parsed as operators."""
def test_and_and_inside_single_quotes(self):
cmd = "echo 'A && B &'"
assert rewrite(cmd) == "echo 'A && B &'"
def test_and_and_inside_double_quotes(self):
cmd = 'echo "A && B &"'
assert rewrite(cmd) == 'echo "A && B &"'
def test_parenthesised_subshell_left_alone(self):
# `(A && B) &` has the same bug class but isn't the common agent
# pattern. Leave for a follow-up; do not rewrite and do not
# misrewrite content inside the parens.
assert rewrite("(A && B) &") == "(A && B) &"
def test_command_substitution_not_rewritten(self):
# $(A && B) is command substitution; the `&&` inside is a compound
# expression in the subshell, unrelated to the outer `&`.
cmd = 'echo "$(A && B)" &'
assert rewrite(cmd) == 'echo "$(A && B)" &'
def test_backslash_escaped_ampersand(self):
# Escaped & is not a background operator.
cmd = r"echo A \&\& B"
assert rewrite(cmd) == cmd
def test_comment_line_not_rewritten(self):
cmd = "# A && B &\nC"
assert rewrite(cmd) == "# A && B &\nC"
class TestIdempotence:
"""Running the rewriter twice should be a no-op on its own output."""
def test_already_rewritten(self):
once = rewrite("A && B &")
twice = rewrite(once)
assert once == twice
assert twice == "A && { B & }"
def test_multiline_idempotent(self):
once = rewrite("cd /tmp && server &\nsleep 1")
assert rewrite(once) == once
class TestEdgeCases:
def test_only_chain_op_no_second_command(self):
# Malformed input: bash would error, we shouldn't crash or rewrite.
cmd = "A && &"
# Don't assert a specific output; just don't raise.
rewrite(cmd)
def test_only_trailing_ampersand(self):
assert rewrite("&") == "&"
def test_leading_whitespace(self):
assert rewrite(" A && B &") == " A && { B & }"
def test_tabs_between_tokens(self):
assert rewrite("A\t&&\tB\t&") == "A\t&&\t{ B\t& }"