Merge pull request #52044 from NousResearch/fix/install-venv-kill-venv-processes

fix(install): kill venv-resident gateway before recreating venv on Windows
This commit is contained in:
Jeffrey Quesnelle 2026-06-24 15:16:58 -04:00 committed by GitHub
commit b13e2fd694
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 77 additions and 4 deletions

View file

@ -1562,16 +1562,54 @@ function Install-Venv {
if (Test-Path "venv") {
Write-Info "Virtual environment already exists, recreating..."
# On Windows, native Python extensions (e.g. _bcrypt.pyd) are loaded as
# DLLs by any running hermes process. Windows denies deletion of loaded
# DLLs, so kill any hermes.exe tree before removing the venv.
# On Windows, native Python extensions (e.g. _bcrypt.pyd, tornado's
# speedups.pyd) are loaded as DLLs by any running hermes process.
# Windows denies deletion of loaded DLLs, so every process running out
# of this venv must be stopped before removing it -- otherwise
# Remove-Item fails with "Access to the path '...' is denied" and the
# whole install/update aborts at this stage.
if ($env:OS -eq "Windows_NT") {
$myPid = $PID
Write-Info "Stopping any running hermes processes before recreating venv..."
# The launcher CLI (hermes.exe) plus its child tree.
& taskkill /F /T /IM hermes.exe /FI "PID ne $myPid" 2>$null | Out-Null
# taskkill /IM hermes.exe is NOT enough: the gateway/agent that a
# scheduled task or watchdog autostarts runs as
# `pythonw.exe -m hermes_cli.main gateway run` straight out of
# venv\Scripts\, so its image name is python/pythonw, not hermes.exe.
# That process holds the venv's .pyd files open and re-triggers the
# access-denied failure. Stop anything whose executable lives under
# this venv, matched by path prefix so the image name does not matter
# and a global/system python outside the venv is never touched.
#
# The gateway autostart task registers with /RL LIMITED as the current
# user (see hermes_cli/gateway_windows.py), so the installer always
# runs at equal-or-higher integrity and can read its executable path.
# Get-CimInstance is used over Get-Process because it returns a null
# ExecutablePath for a process it cannot inspect (a different session)
# instead of throwing, so an unreadable process is skipped rather than
# aborting the whole sweep.
$venvPrefix = [System.IO.Path]::GetFullPath((Join-Path $InstallDir "venv")).TrimEnd('\') + '\'
try {
Get-CimInstance Win32_Process -ErrorAction Stop |
Where-Object { $_.ProcessId -ne $myPid -and $_.ExecutablePath -and $_.ExecutablePath.StartsWith($venvPrefix, [System.StringComparison]::OrdinalIgnoreCase) } |
ForEach-Object {
Write-Info " stopping PID $($_.ProcessId) ($($_.Name)) running from venv"
Stop-Process -Id $_.ProcessId -Force -ErrorAction SilentlyContinue
}
} catch {
Write-Warn "Could not enumerate venv processes: $($_.Exception.Message)"
}
Start-Sleep -Milliseconds 800
}
Remove-Item -Recurse -Force "venv"
Remove-Item -Recurse -Force "venv" -ErrorAction SilentlyContinue
# A killed process can take a moment to release its file handles, so a
# first Remove-Item may still hit a locked .pyd. Retry once after a short
# pause before giving up and letting the stage fail loudly.
if (Test-Path "venv") {
Start-Sleep -Seconds 2
Remove-Item -Recurse -Force "venv"
}
}
# uv creates the venv and pins the Python version in one step. uv emits

View file

@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"dana@added-value.co.il": "Danamove", # PR #46726 salvage (kill venv-resident pythonw gateway before recreating venv on Windows; #47036/#47557/#47910)
"145739220+wgu9@users.noreply.github.com": "wgu9", # PR #51468 salvage (WSL/no-systemd orphan gateway tracking, #51325)
"165020384+uperLu@users.noreply.github.com": "uperLu", # PR #50958 salvage (rename plugins/cron → plugins/cron_providers; #50872)
"277269729+yusekiotacode@users.noreply.github.com": "yusekiotacode", # PR #48706 salvage (anthropic OAuth login token endpoint → platform.claude.com; #45250/#49821)

View file

@ -141,3 +141,37 @@ def test_install_sh_clears_unmerged_index_before_stash_source_order() -> None:
idx_unmerged = text.index("ls-files --unmerged")
idx_stash = text.index("stash push --include-untracked")
assert idx_unmerged < idx_stash
def test_install_ps1_stops_venv_resident_processes_before_removing_venv() -> None:
"""The Windows venv-recreate path must stop every process running out of the
old venv before deleting it.
A gateway autostarted by a scheduled task runs as
``venv\\Scripts\\pythonw.exe -m hermes_cli.main gateway run`` image name
``pythonw``, not ``hermes.exe`` so the ``taskkill /IM hermes.exe`` guard
misses it, the loaded ``.pyd`` stays locked, and ``Remove-Item venv`` fails
mid-recursion (issues #47036/#47557/#47910). The recreate branch must also
sweep by venv path prefix, and that sweep must run before the delete.
"""
text = INSTALL_PS1.read_text()
# The hermes.exe tree-kill is preserved (kills spawned child processes too).
assert 'taskkill /F /T /IM hermes.exe' in text
# The venv path-prefix sweep exists. It must match by case-insensitive
# StartsWith, NOT PowerShell -like: a venv path containing wildcard
# metacharacters ('[', ']') — legal in a Windows user name — silently fails
# to match under -like, reintroducing the exact miss this fix closes.
idx_recreate = text.index("Virtual environment already exists, recreating")
idx_sweep = text.index("StartsWith($venvPrefix", idx_recreate)
assert "[System.StringComparison]::OrdinalIgnoreCase" in text[idx_sweep:idx_sweep + 200]
assert 'ExecutablePath -like "$venvRoot' not in text, (
"the -like wildcard match must not be used for venv path scoping"
)
# The process sweep must run before the venv is removed, or it is a no-op.
idx_remove = text.index('Remove-Item -Recurse -Force "venv"', idx_recreate)
assert idx_sweep < idx_remove, (
"venv-resident processes must be stopped before Remove-Item deletes the venv"
)