diff --git a/scripts/install.sh b/scripts/install.sh index 141a3e290ed..864f9e07584 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -1886,26 +1886,98 @@ strip_snap_browser_override() { } run_browser_install_with_timeout() { + run_with_timeout "$@" +} + +# Run a command with a hard wall-clock timeout, returning non-zero if it is +# killed. Prefers GNU coreutils `timeout` (Linux) or `gtimeout` (macOS via +# Homebrew) for an external-command target; otherwise (and always for a shell +# function target, which the `timeout` binary cannot exec) it uses a pure-shell +# watchdog: launch the command in its own process group, poll until it finishes, +# and SIGTERM (then SIGKILL) the whole group on timeout. The pure-shell path is +# what protects the bug-#39219 case — a stalled Electron download on macOS, +# where `timeout` is usually absent — turning an indefinite hang into a non-zero +# exit so callers (install_desktop) can self-heal via the mirror fallback. +# +# $1 (timeout) must be a bare integer number of seconds — the pure-shell loop +# compares it arithmetically (the `timeout` binary would also accept suffixes +# like 15m, but we normalize so both paths share one contract). On timeout the +# return code is 124, matching GNU `timeout`. +run_with_timeout() { local timeout_seconds="$1" shift - if command -v timeout >/dev/null 2>&1; then - # GNU `timeout` runs the command in its own process group, so a terminal - # Ctrl+C is delivered to `timeout` but never reaches the child — the - # download looks frozen and ignores Ctrl+C (#35166). `--foreground` - # keeps the command in the shell's foreground group so Ctrl+C reaches - # it; `-k 10` sends SIGKILL 10s after the deadline so a wedged download - # can't outlive the timeout. Both flags are GNU-only — probe once and - # fall back to plain `timeout` on BusyBox (Alpine), and to direct exec - # when `timeout` is absent (stock macOS, where Ctrl+C works natively). - if timeout --foreground -k 10 1 true >/dev/null 2>&1; then - timeout --foreground -k 10 "$timeout_seconds" "$@" - else - timeout "$timeout_seconds" "$@" + # Normalize to a bare integer; fall back to the desktop default if a caller + # ever passes a suffixed/empty value (the pure-shell loop needs an int). + case "$timeout_seconds" in + ''|*[!0-9]*) timeout_seconds=900 ;; + esac + + # The `timeout` binary can only exec an external command, not a shell + # function. Use it only when the target is NOT a function; functions always + # go through the pure-shell watchdog (which runs them in a subshell of the + # current shell and sees them directly — no fragile env export needed). + if [ "$(type -t "$1" 2>/dev/null)" != "function" ]; then + local timeout_bin="" + if command -v timeout >/dev/null 2>&1; then + timeout_bin="timeout" + elif command -v gtimeout >/dev/null 2>&1; then + timeout_bin="gtimeout" + fi + if [ -n "$timeout_bin" ]; then + # GNU `timeout` runs the command in its own process group, so a + # terminal Ctrl+C is delivered to `timeout` but never reaches the + # child — the download looks frozen and ignores Ctrl+C (#35166). + # `--foreground` keeps the command in the shell's foreground group + # so Ctrl+C reaches it; `-k 10` sends SIGKILL 10s after the deadline + # so a wedged download can't outlive the timeout. Both flags are + # GNU-only — probe once and fall back to plain `timeout` on BusyBox + # (Alpine). When neither binary exists (stock macOS) we drop to the + # pure-shell watchdog below. + if "$timeout_bin" --foreground -k 10 1 true >/dev/null 2>&1; then + "$timeout_bin" --foreground -k 10 "$timeout_seconds" "$@" + else + "$timeout_bin" "$timeout_seconds" "$@" + fi + return $? fi - else - "$@" fi + + # Pure-shell fallback: run in a new process group so we can kill the whole + # subtree (npm spawns node + the Electron downloader as children). + set -m + ( "$@" ) & + local cmd_pid=$! + set +m + + local waited=0 + local rc + while [ "$waited" -lt "$timeout_seconds" ]; do + if ! kill -0 "$cmd_pid" 2>/dev/null; then + # `|| rc=$?` keeps the non-zero child status without letting `set -e` + # abort the caller here (this would fire if run_with_timeout were + # ever called outside an if/|| context). + rc=0; wait "$cmd_pid" 2>/dev/null || rc=$? + return "$rc" + fi + sleep 1 + waited=$((waited + 1)) + done + + # Final boundary recheck: the command may have finished during the last + # poll interval — don't kill (and mislabel as 124) a process that already + # exited cleanly in the last second of the budget. + if ! kill -0 "$cmd_pid" 2>/dev/null; then + rc=0; wait "$cmd_pid" 2>/dev/null || rc=$? + return "$rc" + fi + + # Timed out: kill the process group (negative PID), escalate to KILL. + kill -TERM "-$cmd_pid" 2>/dev/null || kill -TERM "$cmd_pid" 2>/dev/null || true + sleep 2 + kill -KILL "-$cmd_pid" 2>/dev/null || kill -KILL "$cmd_pid" 2>/dev/null || true + wait "$cmd_pid" 2>/dev/null || true + return 124 } # Return success only when the host is an apt release NEWER than the newest one @@ -2048,8 +2120,10 @@ install_node_deps() { if [ -f "$INSTALL_DIR/package.json" ]; then log_info "Installing Node.js dependencies (browser tools)..." cd "$INSTALL_DIR" - npm install --silent 2>/dev/null || { - log_warn "npm install failed (browser tools may not work)" + # Time-boxed: a stalled registry fetch would otherwise hang here with no + # progress (same #39219 stall class as the desktop build below). + run_with_timeout "$NODE_DEPS_TIMEOUT" npm install --silent || { + log_warn "npm install failed or timed out (browser tools may not work)" } log_success "Node.js dependencies installed" @@ -2149,8 +2223,9 @@ install_node_deps() { if [ -f "$INSTALL_DIR/ui-tui/package.json" ]; then log_info "Installing TUI dependencies..." cd "$INSTALL_DIR/ui-tui" - npm install --silent 2>/dev/null || { - log_warn "TUI npm install failed (hermes --tui may not work)" + # Time-boxed: a stalled registry fetch would otherwise hang here (#39219). + run_with_timeout "$NODE_DEPS_TIMEOUT" npm install --silent || { + log_warn "TUI npm install failed or timed out (hermes --tui may not work)" } log_success "TUI dependencies installed" fi @@ -2392,11 +2467,13 @@ ensure_browser() { log_info "Installing agent-browser..." local log_file log_file="$(mktemp)" - if ! "$npm_bin" install -g --prefix "$HERMES_HOME/node" --silent --ignore-scripts \ + # Time-boxed (#39219): a stalled npm registry fetch here would otherwise + # hang the installer with no progress, same class as the desktop build. + if ! run_with_timeout "$NODE_DEPS_TIMEOUT" "$npm_bin" install -g --prefix "$HERMES_HOME/node" --silent --ignore-scripts \ "agent-browser@^0.26.0" \ "@askjo/camofox-browser@^1.5.2" \ >"$log_file" 2>&1; then - log_error "npm install failed:" + log_error "npm install failed or timed out:" cat "$log_file" >&2 rm -f "$log_file" return 1 @@ -2581,6 +2658,18 @@ _desktop_pack() { # Last-resort Electron mirror after GitHub download fails (#47266). DESKTOP_ELECTRON_FALLBACK_MIRROR="https://npmmirror.com/mirrors/electron/" +# Per-attempt wall-clock cap for the desktop npm install / electron-builder pack +# (#39219). A stalled (not failed) Electron download on a throttled/blocked link +# never returns, so without this the installer hangs forever on "Build desktop +# app". 900s is generous enough for a slow-but-progressing ~150MB fetch + build; +# override with DESKTOP_BUILD_TIMEOUT for very slow links. +DESKTOP_BUILD_TIMEOUT="${DESKTOP_BUILD_TIMEOUT:-900}" + +# Wall-clock cap for the plain registry `npm install`s (browser-tools + TUI +# deps). Same #39219 stall class but no ~150MB Electron binary, so a shorter +# default; override with NODE_DEPS_TIMEOUT for very slow links. +NODE_DEPS_TIMEOUT="${NODE_DEPS_TIMEOUT:-600}" + # Electron package dir — workspace-local nest first, then root hoist. _electron_dir() { local install_dir="$1" @@ -2678,8 +2767,28 @@ install_desktop() { # flake) — leaving tsc/typescript unresolved and `npm run pack`'s # `tsc -b` failing with no obvious cause. Fall back to `npm install` # only if `npm ci` is unavailable or the lockfile is out of sync. + # + # Both the install and the build below are wrapped in a hard wall-clock + # timeout (#39219): the Electron binary (~150MB) is fetched from GitHub, + # and on a throttled/blocked connection that download can *stall* — npm + # neither errors nor exits, so the installer sits on "Build desktop app" + # forever with only `npm warn deprecated` lines visible. A stall now + # converts to a non-zero exit, which feeds the existing self-heal / + # mirror-fallback escalation instead of hanging the whole install. + # + # The `npm ci` and its `npm install` fallback SHARE one budget: a stalled + # link wedges both identically, so giving each a full DESKTOP_BUILD_TIMEOUT + # would double the worst-case hang. We compute a single deadline and pass + # the remaining seconds to the fallback (min 30s so it still gets a real + # attempt if `npm ci` failed fast rather than stalling). log_info "Installing desktop workspace dependencies (includes Electron ~150MB, 1-3min)..." - if ( cd "$INSTALL_DIR" && npm ci ) || ( cd "$INSTALL_DIR" && npm install ); then + local _deps_start _deps_remaining + _deps_start=$(date +%s) + if run_with_timeout "$DESKTOP_BUILD_TIMEOUT" bash -c 'cd "$1" && npm ci' _ "$INSTALL_DIR"; then + log_success "Desktop workspace dependencies installed" + elif _deps_remaining=$(( DESKTOP_BUILD_TIMEOUT - ($(date +%s) - _deps_start) )); \ + [ "$_deps_remaining" -lt 30 ] && _deps_remaining=30; \ + run_with_timeout "$_deps_remaining" bash -c 'cd "$1" && npm install' _ "$INSTALL_DIR"; then log_success "Desktop workspace dependencies installed" elif _electron_pkg_staged_missing_dist "$INSTALL_DIR"; then log_warn "Desktop dependency install failed with a missing Electron dist; attempting self-heal..." @@ -2708,7 +2817,7 @@ install_desktop() { # the GitHub-blocked/throttled case (the repeating "retrying" log). log_info "Building desktop app (this takes 1-3 minutes)..." local pack_ok=false - if _desktop_pack "$desktop_dir"; then + if run_with_timeout "$DESKTOP_BUILD_TIMEOUT" _desktop_pack "$desktop_dir"; then pack_ok=true else local purged="" @@ -2719,7 +2828,7 @@ install_desktop() { fi if [ "$restored" = true ]; then log_warn "Desktop build failed; refreshed the Electron download and retrying once..." - if _desktop_pack "$desktop_dir"; then + if run_with_timeout "$DESKTOP_BUILD_TIMEOUT" _desktop_pack "$desktop_dir"; then pack_ok=true fi fi @@ -2731,7 +2840,7 @@ install_desktop() { log_warn "Re-downloading Electron via a public mirror ($DESKTOP_ELECTRON_FALLBACK_MIRROR), then rebuilding..." log_warn " (set ELECTRON_MIRROR yourself to use a different/trusted mirror)" _electron_dist_ok "$INSTALL_DIR" || _restore_electron_dist "$INSTALL_DIR" "$DESKTOP_ELECTRON_FALLBACK_MIRROR" || true - if _desktop_pack "$desktop_dir" "$DESKTOP_ELECTRON_FALLBACK_MIRROR"; then + if run_with_timeout "$DESKTOP_BUILD_TIMEOUT" _desktop_pack "$desktop_dir" "$DESKTOP_ELECTRON_FALLBACK_MIRROR"; then pack_ok=true fi fi