mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-10 03:22:05 +00:00
guard kanban worker lifecycle by run id
This commit is contained in:
parent
f0d278412f
commit
56b4795115
5 changed files with 243 additions and 36 deletions
|
|
@ -1186,6 +1186,79 @@ def test_multiple_attempts_preserved_as_runs(kanban_home):
|
|||
conn.close()
|
||||
|
||||
|
||||
def test_stale_run_cannot_complete_new_attempt(kanban_home, monkeypatch):
|
||||
"""A worker from an earlier attempt cannot close a later retry."""
|
||||
import hermes_cli.kanban_db as _kb
|
||||
|
||||
conn = kb.connect()
|
||||
try:
|
||||
tid = kb.create_task(conn, title="retry guarded", assignee="worker")
|
||||
|
||||
kb.claim_task(conn, tid)
|
||||
run1 = kb.latest_run(conn, tid)
|
||||
kb._set_worker_pid(conn, tid, 98765)
|
||||
monkeypatch.setattr(_kb, "_pid_alive", lambda pid: False)
|
||||
assert kb.detect_crashed_workers(conn) == [tid]
|
||||
|
||||
kb.claim_task(conn, tid)
|
||||
run2 = kb.latest_run(conn, tid)
|
||||
assert run2.id != run1.id
|
||||
|
||||
assert not kb.complete_task(
|
||||
conn,
|
||||
tid,
|
||||
summary="late stale completion",
|
||||
expected_run_id=run1.id,
|
||||
)
|
||||
task = kb.get_task(conn, tid)
|
||||
assert task.status == "running"
|
||||
assert task.current_run_id == run2.id
|
||||
|
||||
assert kb.complete_task(
|
||||
conn,
|
||||
tid,
|
||||
summary="current completion",
|
||||
expected_run_id=run2.id,
|
||||
)
|
||||
runs = kb.list_runs(conn, tid)
|
||||
assert [r.outcome for r in runs] == ["crashed", "completed"]
|
||||
assert runs[-1].summary == "current completion"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_stale_run_cannot_block_or_heartbeat_new_attempt(kanban_home, monkeypatch):
|
||||
"""Stale retry attempts cannot mutate the active run lifecycle."""
|
||||
import hermes_cli.kanban_db as _kb
|
||||
|
||||
conn = kb.connect()
|
||||
try:
|
||||
tid = kb.create_task(conn, title="retry heartbeat guarded", assignee="worker")
|
||||
|
||||
kb.claim_task(conn, tid)
|
||||
run1 = kb.latest_run(conn, tid)
|
||||
kb._set_worker_pid(conn, tid, 98765)
|
||||
monkeypatch.setattr(_kb, "_pid_alive", lambda pid: False)
|
||||
assert kb.detect_crashed_workers(conn) == [tid]
|
||||
|
||||
kb.claim_task(conn, tid)
|
||||
run2 = kb.latest_run(conn, tid)
|
||||
assert run2.id != run1.id
|
||||
|
||||
assert not kb.heartbeat_worker(conn, tid, note="late", expected_run_id=run1.id)
|
||||
assert not kb.block_task(conn, tid, reason="late block", expected_run_id=run1.id)
|
||||
task = kb.get_task(conn, tid)
|
||||
assert task.status == "running"
|
||||
assert task.current_run_id == run2.id
|
||||
assert task.last_heartbeat_at is None
|
||||
|
||||
assert kb.heartbeat_worker(conn, tid, note="current", expected_run_id=run2.id)
|
||||
assert kb.block_task(conn, tid, reason="current block", expected_run_id=run2.id)
|
||||
assert kb.get_task(conn, tid).status == "blocked"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_run_on_block_with_reason(kanban_home):
|
||||
conn = kb.connect()
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -611,6 +611,44 @@ def test_worker_complete_own_task_still_works(worker_env):
|
|||
assert d.get("ok") is True and d.get("task_id") == worker_env
|
||||
|
||||
|
||||
def test_worker_complete_rejects_stale_run_id(worker_env, monkeypatch):
|
||||
"""A retried worker cannot complete the task using an old run token."""
|
||||
from hermes_cli import kanban_db as kb
|
||||
import hermes_cli.kanban_db as _kb
|
||||
|
||||
conn = kb.connect()
|
||||
try:
|
||||
run1 = kb.latest_run(conn, worker_env)
|
||||
kb._set_worker_pid(conn, worker_env, 98765)
|
||||
monkeypatch.setattr(_kb, "_pid_alive", lambda pid: False)
|
||||
assert kb.detect_crashed_workers(conn) == [worker_env]
|
||||
|
||||
kb.claim_task(conn, worker_env)
|
||||
run2 = kb.latest_run(conn, worker_env)
|
||||
assert run2.id != run1.id
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
from tools import kanban_tools as kt
|
||||
monkeypatch.setenv("HERMES_KANBAN_RUN_ID", str(run1.id))
|
||||
out = kt._handle_complete({"summary": "late stale completion"})
|
||||
d = json.loads(out)
|
||||
assert d.get("ok") is not True
|
||||
|
||||
conn = kb.connect()
|
||||
try:
|
||||
task = kb.get_task(conn, worker_env)
|
||||
assert task.status == "running"
|
||||
assert task.current_run_id == run2.id
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
monkeypatch.setenv("HERMES_KANBAN_RUN_ID", str(run2.id))
|
||||
out = kt._handle_complete({"summary": "current completion"})
|
||||
d = json.loads(out)
|
||||
assert d.get("ok") is True
|
||||
|
||||
|
||||
def test_orchestrator_complete_any_task_allowed(monkeypatch, tmp_path):
|
||||
"""Orchestrator profiles (no HERMES_KANBAN_TASK) can still complete
|
||||
any task via explicit task_id. The check only applies to workers."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue