mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 02:01:47 +00:00
fix(ci): stabilize main test suite regressions (#17660)
* fix: stabilize main test suite regressions * test(agent): update MiniMax normalization expectation * test: stabilize remaining CI assertions * test: harden config helper monkeypatching * test: harden CI-only assertions * fix(agent): propagate fast streaming interrupts
This commit is contained in:
parent
e7beaaf184
commit
f73364b1c4
37 changed files with 450 additions and 127 deletions
|
|
@ -16,6 +16,7 @@ import signal
|
|||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -37,6 +38,58 @@ def _pgid_still_alive(pgid: int) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def _process_group_snapshot(pgid: int) -> str:
|
||||
"""Return a process-table snapshot for diagnostics."""
|
||||
return subprocess.run(
|
||||
["ps", "-o", "pid,ppid,pgid,stat,cmd", "-g", str(pgid)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
).stdout.strip()
|
||||
|
||||
|
||||
def _wait_for_pgid_exit(pgid: int, timeout: float = 10.0) -> bool:
|
||||
"""Wait for a process group to disappear under loaded xdist hosts."""
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
if not _pgid_still_alive(pgid):
|
||||
return True
|
||||
time.sleep(0.1)
|
||||
return not _pgid_still_alive(pgid)
|
||||
|
||||
|
||||
def test_kill_process_uses_cached_pgid_if_wrapper_already_exited(monkeypatch):
|
||||
"""If the shell wrapper exits before cleanup, still kill its process group.
|
||||
|
||||
Without the cached pgid fallback, ``os.getpgid(proc.pid)`` raises for the
|
||||
dead wrapper and cleanup falls back to ``proc.kill()``, which cannot reach
|
||||
orphaned grandchildren still running in the original process group.
|
||||
"""
|
||||
env = object.__new__(LocalEnvironment)
|
||||
proc = SimpleNamespace(
|
||||
pid=12345,
|
||||
_hermes_pgid=67890,
|
||||
poll=lambda: 0,
|
||||
kill=lambda: None,
|
||||
)
|
||||
killpg_calls = []
|
||||
|
||||
def fake_getpgid(_pid):
|
||||
raise ProcessLookupError
|
||||
|
||||
def fake_killpg(pgid, sig):
|
||||
killpg_calls.append((pgid, sig))
|
||||
if sig == 0:
|
||||
raise ProcessLookupError
|
||||
|
||||
monkeypatch.setattr(os, "getpgid", fake_getpgid)
|
||||
monkeypatch.setattr(os, "killpg", fake_killpg)
|
||||
|
||||
env._kill_process(proc)
|
||||
|
||||
assert killpg_calls == [(67890, signal.SIGTERM), (67890, 0)]
|
||||
|
||||
|
||||
def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
|
||||
"""When KeyboardInterrupt arrives mid-poll, the subprocess group must be
|
||||
killed before the exception is re-raised."""
|
||||
|
|
@ -118,19 +171,15 @@ def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
|
|||
assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt"
|
||||
|
||||
# The critical assertion: the subprocess GROUP must be dead. Not
|
||||
# just the bash wrapper — the 'sleep 30' child too.
|
||||
# Give the SIGTERM+1s wait+SIGKILL escalation a moment to complete.
|
||||
deadline = time.monotonic() + 3.0
|
||||
while time.monotonic() < deadline:
|
||||
if not _pgid_still_alive(pgid):
|
||||
break
|
||||
time.sleep(0.1)
|
||||
assert not _pgid_still_alive(pgid), (
|
||||
# just the bash wrapper — the 'sleep 30' child too. Under xdist load,
|
||||
# process-group disappearance can lag briefly after the worker exits,
|
||||
# especially if the process is already dying or waiting to be reaped.
|
||||
assert _wait_for_pgid_exit(pgid), (
|
||||
f"subprocess group {pgid} is STILL ALIVE after worker received "
|
||||
f"KeyboardInterrupt — orphan bug regressed. This is the "
|
||||
f"sleep-300-survives-SIGTERM scenario from Physikal's Apr 2026 "
|
||||
f"report. See tools/environments/base.py _wait_for_process "
|
||||
f"except-block."
|
||||
f"except-block.\n{_process_group_snapshot(pgid)}"
|
||||
)
|
||||
# And the worker should have observed the KeyboardInterrupt (i.e.
|
||||
# it re-raised cleanly, not silently swallowed).
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue