mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-11 03:31:55 +00:00
test: migrate stale os.kill monkeypatches to gateway.status._pid_exists
PR #21561 migrated liveness probes across 14 call sites from
`os.kill(pid, 0)` to `gateway.status._pid_exists` (psutil-first) so
the gateway doesn't Ctrl+C-itself on Windows via bpo-14484. A handful of
tests still patched the old `os.kill` seam and either happened to pass
on POSIX (when PID 12345 incidentally wasn't alive on the CI worker) or
failed outright — on CI runs they surfaced as 7 flaky/stable failures.
Migrate each affected test to patch the correct seam:
- tests/tools/test_browser_orphan_reaper.py (5 tests)
Patch `gateway.status._pid_exists` instead of `os.kill`.
Rename test_permission_error_on_kill_check_skips to
test_alive_legacy_daemon_is_reaped — the old assertion was
"PermissionError on sig 0 → skip dir"; post-migration the
untracked-alive-daemon path always reaps the dir after SIGTERM
(best-effort semantics were preserved).
- tests/tools/test_windows_native_support.py (4 tests)
Replace tests that asserted `os.kill` seam behavior with tests
that exercise `ProcessRegistry._is_host_pid_alive` as a
delegator and split out a new TestPidExistsOSErrorWidening class
that hits `gateway.status._pid_exists` directly via the POSIX
fallback branch (so Windows-style `OSError(WinError 87)` + `PermissionError`
widening is still covered on Linux CI).
- tests/tools/test_process_registry.py (1 test)
Mock `psutil.Process` + `_pid_exists` instead of `os.kill`
for the detached-session kill path.
- tests/tools/test_mcp_stability.py::test_kill_orphaned_uses_sigkill_when_available
SIGTERM → alive-check → SIGKILL flow now uses `_pid_exists`
for the middle step; assertion count drops from 3 to 2.
- tests/gateway/test_status.py::TestScopedLocks (2 tests)
`acquire_scoped_lock` consults `_pid_exists`; patch that
seam directly instead of trying to control the nested psutil
call via os.kill monkeypatch.
- tests/hermes_cli/test_gateway.py::test_stop_profile_gateway_keeps_pid_file_when_process_still_running
The stop loop sends one SIGTERM via os.kill then polls 20x via
_pid_exists; instrument both separately. Old assertion
`calls["kill"] == 21` split into `kill == 1` + `alive_probes == 20`.
- tests/hermes_cli/test_auth_toctou_file_modes.py::test_shared_nous_store_writes_0o600_with_0o700_parent
Commit c34884ea2 switched the pytest seat-belt guard in
`_nous_shared_store_path()` from `Path.home() / ".hermes"`
to `get_default_hermes_root()`, which honors HERMES_HOME. The
test sets both HERMES_HOME and HERMES_SHARED_AUTH_DIR to
subpaths of the same tmp_path, and the override now collapses
onto the same path the guard is refusing. Renamed the override
subdirectory so the two paths diverge — guard passes, test runs.
All 21 original CI failures and their local-flaky siblings now pass
(278 tests across the touched files, 0 failures).
This commit is contained in:
parent
291a158441
commit
f5ee780124
7 changed files with 160 additions and 80 deletions
|
|
@ -308,54 +308,106 @@ class TestSigkillFallback:
|
|||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OSError widening on os.kill(pid, 0) probes
|
||||
# OSError widening on liveness probes
|
||||
#
|
||||
# Post-#21561, ``ProcessRegistry._is_host_pid_alive`` delegates to
|
||||
# ``gateway.status._pid_exists``, which is the cross-platform liveness
|
||||
# primitive (psutil-first, ctypes/os.kill fallback). The tests below assert
|
||||
# (a) the delegation is correct and (b) ``_pid_exists`` correctly widens
|
||||
# Windows' ``OSError(WinError 87)`` / ``PermissionError`` behavior on the
|
||||
# POSIX fallback branch.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestProcessRegistryOSErrorWidening:
|
||||
"""_is_host_pid_alive must treat Windows' OSError as 'not alive'."""
|
||||
"""_is_host_pid_alive delegates to gateway.status._pid_exists."""
|
||||
|
||||
def test_oserror_treated_as_not_alive(self, monkeypatch):
|
||||
"""_pid_exists → False propagates as _is_host_pid_alive → False."""
|
||||
from tools.process_registry import ProcessRegistry
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
# Simulate Windows' WinError 87 for an unknown PID
|
||||
raise OSError(22, "Invalid argument")
|
||||
|
||||
monkeypatch.setattr("tools.process_registry.os.kill", fake_kill)
|
||||
monkeypatch.setattr("gateway.status._pid_exists", lambda pid: False)
|
||||
assert ProcessRegistry._is_host_pid_alive(12345) is False
|
||||
|
||||
def test_permission_error_treated_as_not_alive(self, monkeypatch):
|
||||
"""Conservative: PermissionError also means 'not alive' (matches existing behavior)."""
|
||||
def test_permission_error_treated_as_alive(self, monkeypatch):
|
||||
"""PermissionError is encoded by _pid_exists as alive=True; propagates as-is.
|
||||
|
||||
This is a meaningful semantic change from the pre-#21561 version of
|
||||
this test (which asserted PermissionError → not-alive). The old
|
||||
``os.kill(pid, 0)``-based probe couldn't distinguish "gone" from
|
||||
"owned by another user" on some platforms, so it conservatively
|
||||
returned False. The new psutil-based probe CAN distinguish them via
|
||||
``OpenProcess + ERROR_ACCESS_DENIED`` on Windows / ``except
|
||||
PermissionError`` on POSIX, so alive=True is correct.
|
||||
"""
|
||||
from tools.process_registry import ProcessRegistry
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
raise PermissionError(1, "Operation not permitted")
|
||||
monkeypatch.setattr("gateway.status._pid_exists", lambda pid: True)
|
||||
assert ProcessRegistry._is_host_pid_alive(12345) is True
|
||||
|
||||
monkeypatch.setattr("tools.process_registry.os.kill", fake_kill)
|
||||
assert ProcessRegistry._is_host_pid_alive(12345) is False
|
||||
|
||||
def test_zero_or_none_pid_returns_false_without_calling_kill(self, monkeypatch):
|
||||
def test_zero_or_none_pid_returns_false_without_probing(self, monkeypatch):
|
||||
"""No wasted syscall on falsy pids."""
|
||||
from tools.process_registry import ProcessRegistry
|
||||
|
||||
kill_calls = []
|
||||
probes = []
|
||||
monkeypatch.setattr(
|
||||
"tools.process_registry.os.kill",
|
||||
lambda pid, sig: kill_calls.append(pid),
|
||||
"gateway.status._pid_exists",
|
||||
lambda pid: probes.append(pid) or True,
|
||||
)
|
||||
assert ProcessRegistry._is_host_pid_alive(None) is False
|
||||
assert ProcessRegistry._is_host_pid_alive(0) is False
|
||||
assert kill_calls == []
|
||||
assert probes == []
|
||||
|
||||
def test_alive_pid_returns_true(self, monkeypatch):
|
||||
from tools.process_registry import ProcessRegistry
|
||||
|
||||
# os.kill returning None (default) means "probe succeeded → pid alive"
|
||||
monkeypatch.setattr("tools.process_registry.os.kill", lambda pid, sig: None)
|
||||
monkeypatch.setattr("gateway.status._pid_exists", lambda pid: True)
|
||||
assert ProcessRegistry._is_host_pid_alive(os.getpid()) is True
|
||||
|
||||
|
||||
class TestPidExistsOSErrorWidening:
|
||||
"""gateway.status._pid_exists itself must widen Windows errors correctly.
|
||||
|
||||
The POSIX fallback branch (reached when psutil isn't importable) is the
|
||||
only path where Python raises ``OSError(WinError 87)`` on Windows for a
|
||||
gone PID instead of ``ProcessLookupError``. The function must catch the
|
||||
wider ``OSError`` to match POSIX semantics.
|
||||
"""
|
||||
|
||||
def test_oserror_gone_pid_returns_false(self, monkeypatch):
|
||||
"""Simulate Windows' OSError(WinError 87) for a gone PID via the POSIX fallback."""
|
||||
from gateway import status
|
||||
|
||||
# Force the psutil-first branch to miss so we exercise the fallback.
|
||||
monkeypatch.setitem(
|
||||
__import__("sys").modules, "psutil",
|
||||
type("P", (), {"pid_exists": staticmethod(lambda pid: (_ for _ in ()).throw(ImportError()))})()
|
||||
)
|
||||
monkeypatch.setattr(status, "_IS_WINDOWS", False)
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
raise OSError(22, "Invalid argument")
|
||||
|
||||
monkeypatch.setattr(status.os, "kill", fake_kill)
|
||||
assert status._pid_exists(12345) is False
|
||||
|
||||
def test_permission_error_returns_true(self, monkeypatch):
|
||||
"""POSIX fallback: PermissionError means alive (owned by another user)."""
|
||||
from gateway import status
|
||||
|
||||
monkeypatch.setitem(
|
||||
__import__("sys").modules, "psutil",
|
||||
type("P", (), {"pid_exists": staticmethod(lambda pid: (_ for _ in ()).throw(ImportError()))})()
|
||||
)
|
||||
monkeypatch.setattr(status, "_IS_WINDOWS", False)
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
raise PermissionError(1, "Operation not permitted")
|
||||
|
||||
monkeypatch.setattr(status.os, "kill", fake_kill)
|
||||
assert status._pid_exists(12345) is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# tzdata dependency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue