fix(process): correct detached crash recovery state

Previously crash recovery recreated detached sessions as if they were
fully managed, so polls and kills could lie about liveness and the
checkpoint could forget recovered jobs after the next restart.
This commit refreshes recovered host-backed sessions from real PID
state, keeps checkpoint data durable, and preserves notify watcher
metadata while treating sandbox-only PIDs as non-recoverable.

- Persist `pid_scope` in `tools/process_registry.py` and skip
  recovering sandbox-backed entries without a host-visible PID handle
- Refresh detached sessions on access so `get`/`poll`/`wait` and active
  session queries observe exited processes instead of hanging forever
- Allow recovered host PIDs to be terminated honestly and requeue
  `notify_on_complete` watchers during checkpoint recovery
- Add regression tests for durable checkpoints, detached exit/kill
  behavior, sandbox skip logic, and recovered notify watchers
This commit is contained in:
mrshu 2026-04-08 08:59:52 +02:00 committed by Teknium
parent 383db35925
commit 19b0ddce40
3 changed files with 241 additions and 13 deletions

View file

@ -197,6 +197,26 @@ class TestCheckpointNotify:
s = registry.get("proc_live")
assert s.notify_on_complete is True
def test_recover_requeues_notify_watchers(self, registry, tmp_path):
checkpoint = tmp_path / "procs.json"
checkpoint.write_text(json.dumps([{
"session_id": "proc_live",
"command": "sleep 999",
"pid": os.getpid(),
"task_id": "t1",
"session_key": "sk1",
"watcher_platform": "telegram",
"watcher_chat_id": "123",
"watcher_thread_id": "42",
"watcher_interval": 5,
"notify_on_complete": True,
}]))
with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
recovered = registry.recover_from_checkpoint()
assert recovered == 1
assert len(registry.pending_watchers) == 1
assert registry.pending_watchers[0]["notify_on_complete"] is True
def test_recover_defaults_false(self, registry, tmp_path):
"""Old checkpoint entries without the field default to False."""
checkpoint = tmp_path / "procs.json"