"""Tests for kanban worker/runs read endpoints. Covers: GET /workers/active GET /runs/{run_id} GET /runs/{run_id}/inspect POST /runs/{run_id}/terminate """ from __future__ import annotations import importlib.util import secrets import sys import time from pathlib import Path from unittest.mock import MagicMock import pytest from fastapi import FastAPI from fastapi.testclient import TestClient from hermes_cli import kanban_db as kb # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- def _load_plugin_router(): """Dynamically load plugins/kanban/dashboard/plugin_api.py and return its router.""" repo_root = Path(__file__).resolve().parents[2] plugin_file = repo_root / "plugins" / "kanban" / "dashboard" / "plugin_api.py" assert plugin_file.exists(), f"plugin file missing: {plugin_file}" mod_name = "hermes_dashboard_plugin_kanban_worker_runs_test" # Re-use a cached module if already loaded to avoid duplicate-router issues. if mod_name in sys.modules: return sys.modules[mod_name].router spec = importlib.util.spec_from_file_location(mod_name, plugin_file) assert spec is not None and spec.loader is not None mod = importlib.util.module_from_spec(spec) sys.modules[mod_name] = mod spec.loader.exec_module(mod) return mod.router @pytest.fixture def kanban_home(tmp_path, monkeypatch): """Isolated HERMES_HOME with an empty kanban DB.""" home = tmp_path / ".hermes" home.mkdir() monkeypatch.setenv("HERMES_HOME", str(home)) monkeypatch.setattr(Path, "home", lambda: tmp_path) kb.init_db() return home @pytest.fixture def client(kanban_home): app = FastAPI() app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban") return TestClient(app) def _insert_run(conn, task_id, *, worker_pid=None, ended_at=None): """Insert a task_runs row directly (bypassing claim machinery) and return run_id.""" lock = secrets.token_hex(8) future = int(time.time()) + 3600 cur = conn.execute( "INSERT INTO task_runs " "(task_id, status, claim_lock, claim_expires, worker_pid, started_at, ended_at) " "VALUES (?, 'running', ?, ?, ?, ?, ?)", (task_id, lock, future, worker_pid, int(time.time()), ended_at), ) conn.commit() return cur.lastrowid # --------------------------------------------------------------------------- # GET /workers/active # --------------------------------------------------------------------------- def test_workers_active_empty_board(client): """Board with no running tasks returns an empty workers list.""" r = client.get("/api/plugins/kanban/workers/active") assert r.status_code == 200 body = r.json() assert body["workers"] == [] assert body["count"] == 0 assert "checked_at" in body def test_workers_active_with_running_task(client): """A running task with an open run row and worker_pid appears in the list.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="active-worker", assignee="alice") conn.execute( "UPDATE tasks SET status='running' WHERE id=?", (task_id,), ) _insert_run(conn, task_id, worker_pid=12345) finally: conn.close() r = client.get("/api/plugins/kanban/workers/active") assert r.status_code == 200 body = r.json() assert body["count"] == 1 w = body["workers"][0] assert w["task_id"] == task_id assert w["worker_pid"] == 12345 assert w["task_status"] == "running" assert w["task_title"] == "active-worker" assert w["task_assignee"] == "alice" def test_workers_active_excludes_ended_runs(client): """Runs with ended_at set are excluded even if task is running.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="ended-run", assignee="bob") conn.execute("UPDATE tasks SET status='running' WHERE id=?", (task_id,)) _insert_run(conn, task_id, worker_pid=99999, ended_at=int(time.time()) - 60) finally: conn.close() r = client.get("/api/plugins/kanban/workers/active") assert r.status_code == 200 assert r.json()["count"] == 0 def test_workers_active_excludes_runs_without_pid(client): """Runs with no worker_pid are not considered active workers.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="no-pid", assignee="carol") conn.execute("UPDATE tasks SET status='running' WHERE id=?", (task_id,)) _insert_run(conn, task_id, worker_pid=None) finally: conn.close() r = client.get("/api/plugins/kanban/workers/active") assert r.status_code == 200 assert r.json()["count"] == 0 # --------------------------------------------------------------------------- # GET /runs/{run_id} # --------------------------------------------------------------------------- def test_get_run_404_unknown_id(client): """Non-existent run_id returns 404.""" r = client.get("/api/plugins/kanban/runs/999999") assert r.status_code == 404 assert "999999" in r.json()["detail"] def test_get_run_ok(client): """Existing run row returns 200 with expected shape.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="run-lookup", assignee="dave") run_id = _insert_run(conn, task_id, worker_pid=55555) finally: conn.close() r = client.get(f"/api/plugins/kanban/runs/{run_id}") assert r.status_code == 200 body = r.json() assert "run" in body run = body["run"] assert run["id"] == run_id assert run["task_id"] == task_id assert run["worker_pid"] == 55555 assert run["ended_at"] is None # --------------------------------------------------------------------------- # GET /runs/{run_id}/inspect # --------------------------------------------------------------------------- def test_inspect_run_404(client): """Non-existent run_id returns 404.""" r = client.get("/api/plugins/kanban/runs/888888/inspect") assert r.status_code == 404 def test_inspect_run_already_ended(client): """Run with ended_at set returns alive=false with reason.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="ended", assignee="eve") run_id = _insert_run(conn, task_id, worker_pid=11111, ended_at=int(time.time()) - 10) finally: conn.close() r = client.get(f"/api/plugins/kanban/runs/{run_id}/inspect") assert r.status_code == 200 body = r.json() assert body["alive"] is False assert "ended" in body["reason"] def test_inspect_run_no_pid(client): """Run with no worker_pid returns alive=false with reason.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="no-pid-inspect", assignee="frank") run_id = _insert_run(conn, task_id, worker_pid=None) finally: conn.close() r = client.get(f"/api/plugins/kanban/runs/{run_id}/inspect") assert r.status_code == 200 body = r.json() assert body["alive"] is False assert "worker_pid" in body["reason"] def test_inspect_run_dead_pid(client, monkeypatch): """Run with a non-existent PID returns alive=false via psutil.NoSuchProcess.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="dead-pid", assignee="grace") run_id = _insert_run(conn, task_id, worker_pid=999999) finally: conn.close() # Mock psutil to raise NoSuchProcess for any PID. mock_psutil = MagicMock() mock_psutil.NoSuchProcess = Exception mock_psutil.AccessDenied = PermissionError def _raise_no_such(*args, **kwargs): raise mock_psutil.NoSuchProcess("no such process") mock_psutil.Process = _raise_no_such # Patch the module-level _psutil in the loaded plugin module. plugin_mod_name = "hermes_dashboard_plugin_kanban_worker_runs_test" plugin_mod = sys.modules.get(plugin_mod_name) if plugin_mod is not None: monkeypatch.setattr(plugin_mod, "_psutil", mock_psutil) else: pytest.skip("plugin module not yet loaded") r = client.get(f"/api/plugins/kanban/runs/{run_id}/inspect") assert r.status_code == 200 body = r.json() assert body["alive"] is False assert body["pid"] == 999999 assert "not found" in body["reason"] def test_inspect_run_live_pid(client, monkeypatch): """Run with a live PID returns alive=true with psutil fields.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="live-pid", assignee="heidi") run_id = _insert_run(conn, task_id, worker_pid=12345) finally: conn.close() # Build a realistic mock psutil. mock_psutil = MagicMock() mock_psutil.NoSuchProcess = type("NoSuchProcess", (Exception,), {}) mock_psutil.AccessDenied = type("AccessDenied", (Exception,), {}) fake_mem = MagicMock() fake_mem.rss = 1024 * 1024 * 50 # 50 MB fake_mem.vms = 1024 * 1024 * 200 fake_proc = MagicMock() fake_proc.as_dict.return_value = { "cpu_percent": 3.5, "memory_info": fake_mem, "num_threads": 4, "status": "sleeping", "create_time": time.time() - 300, "cmdline": ["python", "-m", "hermes"], } fake_proc.num_fds.return_value = 12 mock_psutil.Process.return_value = fake_proc plugin_mod_name = "hermes_dashboard_plugin_kanban_worker_runs_test" plugin_mod = sys.modules.get(plugin_mod_name) if plugin_mod is not None: monkeypatch.setattr(plugin_mod, "_psutil", mock_psutil) else: pytest.skip("plugin module not yet loaded") r = client.get(f"/api/plugins/kanban/runs/{run_id}/inspect") assert r.status_code == 200 body = r.json() assert body["alive"] is True assert body["pid"] == 12345 assert body["cpu_percent"] == 3.5 assert body["memory_rss_bytes"] == fake_mem.rss assert body["num_threads"] == 4 assert body["status"] == "sleeping" # --------------------------------------------------------------------------- # POST /runs/{run_id}/terminate # --------------------------------------------------------------------------- def _setup_running_task_with_run(conn, *, title, assignee, worker_pid): """Create a task in 'running' state with a matching open task_runs row. Mirrors what dispatcher_claim does: stamps tasks.status='running', tasks.claim_lock, tasks.worker_pid; inserts task_runs row with the same claim_lock so reclaim_task's preconditions are satisfied. """ task_id = kb.create_task(conn, title=title, assignee=assignee) lock = secrets.token_hex(8) future = int(time.time()) + 3600 conn.execute( "UPDATE tasks SET status='running', claim_lock=?, " "claim_expires=?, worker_pid=? WHERE id=?", (lock, future, worker_pid, task_id), ) cur = conn.execute( "INSERT INTO task_runs " "(task_id, status, claim_lock, claim_expires, worker_pid, started_at) " "VALUES (?, 'running', ?, ?, ?, ?)", (task_id, lock, future, worker_pid, int(time.time())), ) conn.commit() return task_id, cur.lastrowid def test_terminate_run_404_unknown_id(client): """POST to unknown run_id returns 404.""" r = client.post( "/api/plugins/kanban/runs/777777/terminate", json={"reason": "test"}, ) assert r.status_code == 404 assert "777777" in r.json()["detail"] def test_terminate_run_409_already_ended(client): """POST against a run with ended_at set returns 409.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="ended-terminate", assignee="ivy") run_id = _insert_run( conn, task_id, worker_pid=22222, ended_at=int(time.time()) - 30, ) finally: conn.close() r = client.post( f"/api/plugins/kanban/runs/{run_id}/terminate", json={"reason": "too late"}, ) assert r.status_code == 409 assert "already ended" in r.json()["detail"] def test_terminate_run_ok(client, monkeypatch): """Happy path: live run is terminated, signal fn invoked, reason recorded.""" conn = kb.connect() try: task_id, run_id = _setup_running_task_with_run( conn, title="kill-me", assignee="jane", worker_pid=33333, ) finally: conn.close() # Capture signal calls so we don't actually SIGTERM a random PID. sent = [] def _fake_terminate(pid, prev_lock, *, signal_fn=None): sent.append((pid, prev_lock)) return {"signal": "SIGTERM", "delivered": True} monkeypatch.setattr(kb, "_terminate_reclaimed_worker", _fake_terminate) r = client.post( f"/api/plugins/kanban/runs/{run_id}/terminate", json={"reason": "operator abort"}, ) assert r.status_code == 200, r.text body = r.json() assert body == {"ok": True, "run_id": run_id, "task_id": task_id} assert sent == [(33333, sent[0][1])] assert sent[0][1] is not None # claim_lock was non-null # Task is back to ready, claim cleared. conn = kb.connect() try: row = conn.execute( "SELECT status, claim_lock, worker_pid FROM tasks WHERE id=?", (task_id,), ).fetchone() finally: conn.close() assert row["status"] == "ready" assert row["claim_lock"] is None assert row["worker_pid"] is None def test_terminate_run_409_task_not_reclaimable(client, monkeypatch): """Open run row whose task is no longer claimable returns 409.""" conn = kb.connect() try: task_id = kb.create_task(conn, title="ghost-run", assignee="ken") # Task left in default 'ready' state with no claim_lock — task_run # exists but reclaim_task will refuse because status != 'running' # and claim_lock is NULL. run_id = _insert_run(conn, task_id, worker_pid=44444) finally: conn.close() # Make sure no signal is ever sent on this code path. def _boom(*a, **k): raise AssertionError("_terminate_reclaimed_worker should not be called") monkeypatch.setattr(kb, "_terminate_reclaimed_worker", _boom) r = client.post( f"/api/plugins/kanban/runs/{run_id}/terminate", json={"reason": "stale"}, ) assert r.status_code == 409 assert "reclaimable" in r.json()["detail"] def test_terminate_run_accepts_empty_body(client): """Empty JSON body (no reason) is still accepted; falls through to 404.""" r = client.post( "/api/plugins/kanban/runs/666666/terminate", json={}, ) # 404 because run doesn't exist — what we're asserting here is that # the endpoint doesn't 422 on a missing 'reason' field. assert r.status_code == 404