fix(hermes): heal broken managed Node tree instead of PATH fallback

When a Hermes-managed node/npm/npx shim exists but fails --version, redownload
the pinned nodejs.org bundle under HERMES_HOME/node and retry. Do not fall
back to system npm on PATH when a managed tree is present.

POSIX heal probes node, npm, and npx (npm can break while node still runs).
This commit is contained in:
xxxigm 2026-06-26 21:02:24 +07:00 committed by kshitij
parent 3c5bcd3eee
commit 65be0061e0
4 changed files with 260 additions and 14 deletions

View file

@ -277,6 +277,11 @@ def _candidate_node_command_names(command: str) -> list[str]:
return [f"{base}.cmd", f"{base}.exe", base]
_HERMES_NODE_TARGET_MAJOR = int(os.environ.get("HERMES_NODE_TARGET_MAJOR", "22"))
_managed_node_heal_attempted = False
_NODE_BOOTSTRAP_SCRIPT = Path(__file__).resolve().parent / "scripts" / "lib" / "node-bootstrap.sh"
def node_tool_runnable(path: str | None) -> bool:
"""Return True only when *path* is a Node/npm/npx binary that actually runs.
@ -285,10 +290,10 @@ def node_tool_runnable(path: str | None) -> bool:
``bin/npm`` behind while ``lib/cli.js`` is missing the wrapper exists but
immediately throws ``MODULE_NOT_FOUND``. ``find_hermes_node_executable``
used to trust file presence alone, so ``hermes update`` would pick that
broken npm and skip the healthy system one on PATH.
broken npm and fail the Node refresh / web UI build.
Probe with ``--version`` (same pattern as :func:`agent_browser_runnable`) so
callers can fall through to the next resolution candidate.
broken managed wrappers are detected before use.
"""
if not path:
return False
@ -313,9 +318,126 @@ def node_tool_runnable(path: str | None) -> bool:
return result.returncode == 0
def hermes_managed_node_tree_present(home: Path | None = None) -> bool:
"""Return True when any Hermes-managed node/npm/npx shim exists on disk."""
names = set()
for command in ("node", "npm", "npx"):
names.update(_candidate_node_command_names(command))
for directory in iter_hermes_node_dirs(home):
for name in names:
candidate = directory / name
if candidate.is_file() and (
sys.platform == "win32" or os.access(candidate, os.X_OK)
):
return True
return False
def _heal_managed_node_windows() -> bool:
"""Redownload the portable Node zip into ``%HERMES_HOME%\\node`` on Windows."""
import re
import tempfile
import urllib.request
import zipfile
arch = (os.environ.get("PROCESSOR_ARCHITEW6432") or os.environ.get("PROCESSOR_ARCHITECTURE", "")).lower()
if arch in ("amd64", "x86_64"):
node_arch = "x64"
elif arch == "arm64":
node_arch = "arm64"
elif arch in ("x86",):
node_arch = "x86"
else:
return False
home = get_hermes_home()
index_url = f"https://nodejs.org/dist/latest-v{_HERMES_NODE_TARGET_MAJOR}.x/"
try:
with urllib.request.urlopen(index_url, timeout=60) as response:
index_html = response.read().decode("utf-8", errors="replace")
except OSError:
return False
match = re.search(
rf"node-v{_HERMES_NODE_TARGET_MAJOR}\.\d+\.\d+-win-{node_arch}\.zip",
index_html,
)
if not match:
return False
zip_name = match.group(0)
download_url = f"{index_url}{zip_name}"
try:
with urllib.request.urlopen(download_url, timeout=300) as response:
zip_bytes = response.read()
except OSError:
return False
try:
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
zip_path = tmp_path / zip_name
zip_path.write_bytes(zip_bytes)
extract_dir = tmp_path / "extract"
extract_dir.mkdir()
with zipfile.ZipFile(zip_path) as archive:
archive.extractall(extract_dir)
extracted = next(extract_dir.glob("node-v*"), None)
if extracted is None or not extracted.is_dir():
return False
target = home / "node"
if target.exists():
shutil.rmtree(target)
shutil.move(str(extracted), str(target))
except OSError:
return False
return node_tool_runnable(str(target / "node.exe"))
def heal_hermes_managed_node() -> bool:
"""Redownload Hermes-managed Node when the tree exists but is broken.
Runs at most once per process. POSIX installs shell out to
``heal_managed_node`` in ``scripts/lib/node-bootstrap.sh``; Windows
downloads the portable zip directly (same source as ``install.ps1``).
"""
global _managed_node_heal_attempted
if _managed_node_heal_attempted:
return False
if not hermes_managed_node_tree_present():
return False
_managed_node_heal_attempted = True
if sys.platform == "win32":
return _heal_managed_node_windows()
if not _NODE_BOOTSTRAP_SCRIPT.is_file():
return False
import subprocess
try:
result = subprocess.run(
[
"bash",
"-c",
f'source "{_NODE_BOOTSTRAP_SCRIPT}" && heal_managed_node',
],
env={**os.environ, "HERMES_HOME": str(get_hermes_home())},
capture_output=True,
timeout=300,
check=False,
)
except (OSError, subprocess.SubprocessError):
return False
return result.returncode == 0
def find_hermes_node_executable(command: str) -> str | None:
"""Return a Hermes-managed Node/npm executable path, if installed and runnable."""
"""Return a Hermes-managed Node/npm executable path, healing broken trees."""
names = _candidate_node_command_names(command)
broken_present = False
for directory in iter_hermes_node_dirs():
for name in names:
candidate = directory / name
@ -325,6 +447,17 @@ def find_hermes_node_executable(command: str) -> str | None:
resolved = str(candidate)
if node_tool_runnable(resolved):
return resolved
broken_present = True
if broken_present and heal_hermes_managed_node():
for directory in iter_hermes_node_dirs():
for name in names:
candidate = directory / name
if candidate.is_file() and (
sys.platform == "win32" or os.access(candidate, os.X_OK)
):
resolved = str(candidate)
if node_tool_runnable(resolved):
return resolved
return None
@ -360,11 +493,16 @@ def find_node_executable(command: str) -> str | None:
"""Resolve a Node.js command, preferring healthy Hermes-managed installs.
This is for Hermes-owned subprocesses that should not be broken by a bad,
missing, or elevation-triggering system Node/npm on PATH. A managed tree
that exists on disk but fails a ``--version`` probe is skipped so PATH can
supply a working system install instead.
missing, or elevation-triggering system Node/npm on PATH. When a managed
tree exists but cannot be healed, returns ``None`` instead of falling back
to system npm on PATH.
"""
return find_hermes_node_executable(command) or find_node_executable_on_path(command)
managed = find_hermes_node_executable(command)
if managed:
return managed
if hermes_managed_node_tree_present():
return None
return find_node_executable_on_path(command)
def with_hermes_node_path(env: dict[str, str] | None = None) -> dict[str, str]:

View file

@ -229,6 +229,49 @@ _nb_install_bundled_node() {
return 0
}
# ---------------------------------------------------------------------------
# Heal a broken Hermes-managed Node tree (partial upgrade / missing lib/)
# ---------------------------------------------------------------------------
_nb_managed_tool_broken() {
local tool="$1"
local probe
for probe in \
"$HERMES_HOME/node/bin/$tool" \
"$HERMES_HOME/node/${tool}.exe" \
"$HERMES_HOME/node/$tool"; do
if [ -x "$probe" ] || [ -f "$probe" ]; then
if ! "$probe" --version >/dev/null 2>&1; then
return 0
fi
fi
done
return 1
}
_nb_managed_node_needs_heal() {
local tool
for tool in node npm npx; do
if _nb_managed_tool_broken "$tool"; then
return 0
fi
done
return 1
}
# Redownload the pinned nodejs.org tarball when a managed tree exists but
# node/npm/npx fail a --version probe. No-op when the tree is healthy or
# absent. Used by hermes_constants.find_hermes_node_executable() and safe
# to call from install reruns.
heal_managed_node() {
[ -d "$HERMES_HOME/node" ] || return 1
if ! _nb_managed_node_needs_heal; then
return 0
fi
_nb_log "Hermes-managed Node is broken — redownloading to $HERMES_HOME/node/..."
_nb_install_bundled_node
}
# ---------------------------------------------------------------------------
# Public entry point
# ---------------------------------------------------------------------------

View file

@ -14,6 +14,8 @@ from hermes_constants import (
find_node_executable_on_path,
get_default_hermes_root,
get_hermes_home,
heal_hermes_managed_node,
hermes_managed_node_tree_present,
iter_hermes_node_dirs,
is_container,
node_tool_runnable,
@ -165,7 +167,7 @@ class TestHermesManagedNode:
assert find_node_executable("npm") == str(npm_cmd)
def test_windows_skips_broken_managed_npm_for_path_fallback(self, tmp_path, monkeypatch):
def test_windows_skips_broken_managed_npm_without_path_fallback(self, tmp_path, monkeypatch):
home = tmp_path / "hermes"
managed_npm = home / "node" / "npm.cmd"
managed_npm.parent.mkdir(parents=True)
@ -177,13 +179,17 @@ class TestHermesManagedNode:
monkeypatch.setattr(hermes_constants.sys, "platform", "win32")
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setenv("PATH", str(bin_dir))
monkeypatch.setattr(hermes_constants, "_managed_node_heal_attempted", False)
monkeypatch.setattr(hermes_constants, "heal_hermes_managed_node", lambda: False)
monkeypatch.setattr(
hermes_constants,
"node_tool_runnable",
lambda path: path == str(path_npm),
lambda path: False,
)
assert find_node_executable("npm") == str(path_npm)
assert hermes_managed_node_tree_present() is True
assert find_node_executable("npm") is None
assert find_node_executable("npm") != str(path_npm)
def test_with_hermes_node_path_prepends_existing_managed_dirs(self, tmp_path, monkeypatch):
home = tmp_path / "hermes"
@ -223,11 +229,42 @@ class TestNodeToolRunnable:
bad = self._stub(tmp_path, "npm", "#!/bin/sh\nexit 1\n")
assert node_tool_runnable(str(bad)) is False
def test_broken_managed_npm_falls_back_to_system_path(self, tmp_path, monkeypatch):
def test_broken_managed_npm_heals_when_node_still_runs(self, tmp_path, monkeypatch):
"""npm can fail while node --version still succeeds (missing lib/cli.js)."""
profile_home = tmp_path / "profiles" / "assistant"
managed_bin = profile_home / "node" / "bin"
managed_bin.mkdir(parents=True)
self._stub(managed_bin, "node", "#!/bin/sh\necho '22.0.0'\nexit 0\n")
broken_npm = self._stub(managed_bin, "npm", "#!/bin/sh\nexit 1\n")
heal_called = {"value": False}
system_bin = tmp_path / "system-bin"
system_bin.mkdir()
self._stub(system_bin, "npm", "#!/bin/sh\necho '11.10.0'\nexit 0\n")
monkeypatch.setenv("HERMES_HOME", str(profile_home))
monkeypatch.setenv("PATH", str(system_bin))
monkeypatch.setattr(hermes_constants, "_managed_node_heal_attempted", False)
def _heal():
heal_called["value"] = True
broken_npm.write_text("#!/bin/sh\necho '22.0.0'\nexit 0\n")
broken_npm.chmod(0o755)
return True
monkeypatch.setattr(hermes_constants, "heal_hermes_managed_node", _heal)
resolved = find_node_executable("npm")
assert heal_called["value"] is True
assert resolved == str(broken_npm)
assert resolved != str(system_bin / "npm")
def test_broken_managed_npm_heals_instead_of_path_fallback(self, tmp_path, monkeypatch):
profile_home = tmp_path / "profiles" / "assistant"
managed_bin = profile_home / "node" / "bin"
managed_bin.mkdir(parents=True)
broken_npm = self._stub(managed_bin, "npm", "#!/bin/sh\nexit 1\n")
healed_npm = self._stub(managed_bin, "npm", "#!/bin/sh\necho '22.0.0'\nexit 0\n")
system_bin = tmp_path / "system-bin"
system_bin.mkdir()
@ -235,10 +272,35 @@ class TestNodeToolRunnable:
monkeypatch.setenv("HERMES_HOME", str(profile_home))
monkeypatch.setenv("PATH", str(system_bin))
monkeypatch.setattr(hermes_constants, "_managed_node_heal_attempted", False)
assert find_hermes_node_executable("npm") is None
assert find_node_executable("npm") == str(good_npm)
assert find_node_executable("npm") != str(broken_npm)
def _heal():
broken_npm.write_text(healed_npm.read_text())
broken_npm.chmod(0o755)
return True
monkeypatch.setattr(hermes_constants, "heal_hermes_managed_node", _heal)
assert find_hermes_node_executable("npm") == str(healed_npm)
assert find_node_executable("npm") == str(healed_npm)
assert find_node_executable("npm") != str(good_npm)
def test_broken_managed_npm_returns_none_when_heal_fails(self, tmp_path, monkeypatch):
profile_home = tmp_path / "profiles" / "assistant"
managed_bin = profile_home / "node" / "bin"
managed_bin.mkdir(parents=True)
self._stub(managed_bin, "npm", "#!/bin/sh\nexit 1\n")
system_bin = tmp_path / "system-bin"
system_bin.mkdir()
self._stub(system_bin, "npm", "#!/bin/sh\necho '11.10.0'\nexit 0\n")
monkeypatch.setenv("HERMES_HOME", str(profile_home))
monkeypatch.setenv("PATH", str(system_bin))
monkeypatch.setattr(hermes_constants, "_managed_node_heal_attempted", False)
monkeypatch.setattr(hermes_constants, "heal_hermes_managed_node", lambda: False)
assert find_node_executable("npm") is None
def test_healthy_managed_npm_still_preferred(self, tmp_path, monkeypatch):
profile_home = tmp_path / "profiles" / "assistant"

View file

@ -53,3 +53,6 @@ def test_node_bootstrap_redirects_bundled_npm_global_prefix_to_link_dir() -> Non
ensure_node_body = text.split("ensure_node()", 1)[1]
assert "_nb_configure_npm_prefix" in ensure_node_body
assert '[ -x "$HERMES_HOME/node/bin/npm" ] || return 0' in text
assert "heal_managed_node()" in text
assert "_nb_managed_tool_broken" in text
assert "for tool in node npm npx" in text