hermes-agent/tests/gateway/test_planned_stop_watcher.py
kshitijk4poor 66827f8947 chore: prune unused imports and duplicate import redefinitions
Remove unused imports (F401) and duplicate/shadowed import
redefinitions (F811) across the codebase using ruff's safe
autofixes. No behavioral changes -- imports only.

- ~1400 safe autofixes applied across 644 files (net -1072 lines)
- __init__.py re-exports preserved (excluded from F401 removal so
  public re-export surfaces stay intact)
- Re-exports that are imported or monkeypatched by tests but look
  unused in their defining module are kept with explicit # noqa:
  F401 (gateway/run.py load_dotenv; run_agent re-exports from
  agent.message_sanitization, agent.context_compressor,
  agent.retry_utils, agent.prompt_builder, agent.process_bootstrap,
  agent.codex_responses_adapter)
- Unsafe F841 (unused-variable) fixes deliberately skipped -- those
  can change behavior when the RHS has side effects
- ruff lints remain disabled in pyproject.toml (only PLW1514 is
  selected); this is a one-time cleanup, not a config change

Verification:
- python -m compileall: clean
- pytest --collect-only: all 27161 tests collect (zero import errors)
- core entry points import clean (run_agent, model_tools, cli,
  toolsets, hermes_state, batch_runner, gateway)
- static scan: every name any test imports directly from an edited
  module still resolves
2026-05-28 22:26:25 -07:00

265 lines
9.2 KiB
Python

"""Tests for the planned-stop marker watcher thread (gateway/run.py).
The watcher is the Windows-fallback path for the v0.13.0 session-resume
feature — on Windows ``asyncio.add_signal_handler`` raises
NotImplementedError, so the SIGTERM signal handler never runs and the
shutdown drain (which writes ``resume_pending=True``) is skipped. The
watcher closes this gap by polling for the planned-stop marker file
and translating its existence into the same shutdown-handler call a
real SIGTERM would have produced.
See issue #33778 for the original Windows session-loss bug report.
"""
import asyncio
import threading
import time
from unittest.mock import MagicMock
from gateway.run import _run_planned_stop_watcher
class _FakeRunner:
"""Stand-in for GatewayRunner — only exposes the two flags the watcher reads."""
def __init__(self, *, running: bool = True, draining: bool = False):
self._running = running
self._draining = draining
def _make_loop_capturing_calls():
"""Build a fake asyncio loop whose call_soon_threadsafe records its args."""
loop = MagicMock(spec=asyncio.AbstractEventLoop)
loop._captured = []
def fake_call_soon_threadsafe(fn, *args):
loop._captured.append((fn, args))
loop.call_soon_threadsafe = fake_call_soon_threadsafe
return loop
def test_watcher_fires_shutdown_when_marker_appears(tmp_path, monkeypatch):
"""When the marker file exists, the watcher must call the shutdown handler."""
marker = tmp_path / ".gateway-planned-stop.json"
# Patch the marker-path resolver so the watcher polls our temp location.
from gateway import status as status_mod
monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
runner = _FakeRunner(running=True, draining=False)
loop = _make_loop_capturing_calls()
shutdown_handler = MagicMock(name="shutdown_signal_handler")
stop_event = threading.Event()
# Drop the marker before the thread starts.
marker.write_text('{"target_pid": 1234}', encoding="utf-8")
watcher = threading.Thread(
target=_run_planned_stop_watcher,
args=(stop_event, runner, loop, shutdown_handler),
kwargs={"poll_interval": 0.05},
daemon=True,
)
watcher.start()
watcher.join(timeout=2.0)
assert not watcher.is_alive(), "Watcher should exit after firing"
assert len(loop._captured) == 1, (
f"Expected exactly one shutdown invocation, got {loop._captured}"
)
fn, args = loop._captured[0]
assert fn is shutdown_handler
# The handler must be called with signal=None (planned stop sentinel).
assert args == (None,)
def test_watcher_does_not_fire_when_marker_absent(tmp_path, monkeypatch):
"""No marker = no shutdown call. Watcher just spins until stop_event."""
marker = tmp_path / ".gateway-planned-stop.json"
# Deliberately do NOT create the marker.
from gateway import status as status_mod
monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
runner = _FakeRunner(running=True, draining=False)
loop = _make_loop_capturing_calls()
shutdown_handler = MagicMock()
stop_event = threading.Event()
watcher = threading.Thread(
target=_run_planned_stop_watcher,
args=(stop_event, runner, loop, shutdown_handler),
kwargs={"poll_interval": 0.05},
daemon=True,
)
watcher.start()
time.sleep(0.3) # let it poll a few times
stop_event.set()
watcher.join(timeout=2.0)
assert not watcher.is_alive()
assert loop._captured == [], (
f"No marker present, but watcher fired shutdown: {loop._captured}"
)
shutdown_handler.assert_not_called()
def test_watcher_skips_when_runner_already_draining(tmp_path, monkeypatch):
"""If shutdown is already in progress, don't re-fire the handler.
This prevents a race where the SIGTERM handler is mid-drain and the
watcher would double-tap the shutdown path. We check ``_draining``
so the watcher backs off once any shutdown is in flight.
"""
marker = tmp_path / ".gateway-planned-stop.json"
marker.write_text('{"target_pid": 1234}', encoding="utf-8")
from gateway import status as status_mod
monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
# Already draining — watcher should be a no-op.
runner = _FakeRunner(running=False, draining=True)
loop = _make_loop_capturing_calls()
shutdown_handler = MagicMock()
stop_event = threading.Event()
watcher = threading.Thread(
target=_run_planned_stop_watcher,
args=(stop_event, runner, loop, shutdown_handler),
kwargs={"poll_interval": 0.05},
daemon=True,
)
watcher.start()
time.sleep(0.2)
stop_event.set()
watcher.join(timeout=2.0)
assert loop._captured == [], "Watcher fired while runner was already draining"
def test_watcher_skips_when_runner_not_started(tmp_path, monkeypatch):
"""If the runner hasn't started, the marker is for a previous instance —
we shouldn't shutdown a not-yet-running gateway.
"""
marker = tmp_path / ".gateway-planned-stop.json"
marker.write_text('{"target_pid": 9999}', encoding="utf-8")
from gateway import status as status_mod
monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
runner = _FakeRunner(running=False, draining=False)
loop = _make_loop_capturing_calls()
shutdown_handler = MagicMock()
stop_event = threading.Event()
watcher = threading.Thread(
target=_run_planned_stop_watcher,
args=(stop_event, runner, loop, shutdown_handler),
kwargs={"poll_interval": 0.05},
daemon=True,
)
watcher.start()
time.sleep(0.2)
stop_event.set()
watcher.join(timeout=2.0)
assert loop._captured == [], "Watcher fired before runner was running"
def test_watcher_responds_to_stop_event_promptly(tmp_path, monkeypatch):
"""Setting stop_event must exit the watcher within ~poll_interval seconds."""
marker = tmp_path / ".gateway-planned-stop.json"
from gateway import status as status_mod
monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
runner = _FakeRunner(running=True, draining=False)
loop = _make_loop_capturing_calls()
stop_event = threading.Event()
watcher = threading.Thread(
target=_run_planned_stop_watcher,
args=(stop_event, runner, loop, MagicMock()),
kwargs={"poll_interval": 0.1},
daemon=True,
)
watcher.start()
time.sleep(0.05)
started_stop = time.monotonic()
stop_event.set()
watcher.join(timeout=2.0)
elapsed = time.monotonic() - started_stop
assert not watcher.is_alive()
assert elapsed < 0.5, f"Watcher took {elapsed:.2f}s to honour stop_event"
def test_watcher_fires_only_once_when_marker_persists(tmp_path, monkeypatch):
"""Marker file existing for multiple polls must NOT spam the handler.
The watcher fires once and exits its loop (the shutdown handler is
responsible for consuming the marker on its own thread). If we
re-fired on every tick, the handler would be invoked dozens of
times before the gateway actually shuts down.
"""
marker = tmp_path / ".gateway-planned-stop.json"
marker.write_text('{"target_pid": 1234}', encoding="utf-8")
from gateway import status as status_mod
monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
runner = _FakeRunner(running=True, draining=False)
loop = _make_loop_capturing_calls()
stop_event = threading.Event()
watcher = threading.Thread(
target=_run_planned_stop_watcher,
args=(stop_event, runner, loop, MagicMock()),
kwargs={"poll_interval": 0.05},
daemon=True,
)
watcher.start()
# Let the watcher tick several times — but it should exit after the first fire.
watcher.join(timeout=1.0)
assert not watcher.is_alive()
assert len(loop._captured) == 1, (
f"Watcher fired {len(loop._captured)} times; should fire once "
f"and exit (events={loop._captured})"
)
def test_watcher_tolerates_marker_path_resolution_errors(tmp_path, monkeypatch, caplog):
"""If _get_planned_stop_marker_path() raises, the watcher logs and continues."""
from gateway import status as status_mod
call_count = [0]
def explode():
call_count[0] += 1
# First call (the one outside the loop, at thread start) is fine —
# but subsequent .exists() calls on a corrupt Path could explode.
if call_count[0] == 1:
return tmp_path / "nonexistent"
raise OSError("filesystem failed")
monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", explode)
runner = _FakeRunner(running=True, draining=False)
loop = _make_loop_capturing_calls()
stop_event = threading.Event()
watcher = threading.Thread(
target=_run_planned_stop_watcher,
args=(stop_event, runner, loop, MagicMock()),
kwargs={"poll_interval": 0.05},
daemon=True,
)
watcher.start()
time.sleep(0.2)
stop_event.set()
watcher.join(timeout=2.0)
assert not watcher.is_alive(), "Watcher should still honour stop_event after errors"
# No shutdown fired because the marker never reported existence.
assert loop._captured == []