mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
fix(kanban): release scratch workspace and tmux session on task completion
Salvages #27369 by @LeonJS. complete_task() now calls _cleanup_workspace() and _cleanup_worker_tmux() after marking a task complete. Scratch workspaces (used by swarm agents) accumulate on disk — hundreds of MB per task, never released. Stale tmux sessions from completed agents also persist indefinitely. Both gates are safe: - workspace_kind == 'scratch' gate preserves user worktree/dir workspaces - tmux #{pane_dead} == 1 gate only kills sessions where the worker has already exited - best-effort: cleanup failures never block task completion
This commit is contained in:
parent
fb96208892
commit
9f008bcd5c
1 changed files with 66 additions and 0 deletions
|
|
@ -79,6 +79,7 @@ import sqlite3
|
|||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
|
@ -86,6 +87,8 @@ from typing import Any, Iterable, Optional
|
|||
|
||||
from toolsets import get_toolset_names
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
|
|
@ -2622,9 +2625,72 @@ def complete_task(
|
|||
_clear_failure_counter(conn, task_id)
|
||||
# Recompute ready status for dependents (separate txn so children see done).
|
||||
recompute_ready(conn)
|
||||
# Clean up the scratch workspace and any stale tmux session for the worker.
|
||||
_cleanup_workspace(conn, task_id)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workspace / tmux cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None:
|
||||
"""Remove a task's scratch workspace dir and kill its stale tmux session.
|
||||
|
||||
Called from :func:`complete_task` after the DB transaction commits.
|
||||
Best-effort — any error is swallowed so cleanup never blocks task completion.
|
||||
Only ``scratch`` workspaces are removed; ``worktree`` and ``dir`` workspaces
|
||||
are intentionally preserved.
|
||||
"""
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT workspace_kind, workspace_path FROM tasks WHERE id = ?",
|
||||
(task_id,),
|
||||
).fetchone()
|
||||
if not row:
|
||||
return
|
||||
kind: Optional[str] = row["workspace_kind"]
|
||||
path: Optional[str] = row["workspace_path"]
|
||||
if kind != "scratch" or not path:
|
||||
return
|
||||
import shutil
|
||||
wp = Path(path)
|
||||
if wp.is_dir():
|
||||
shutil.rmtree(wp, ignore_errors=True)
|
||||
_log.debug("Removed scratch workspace: %s", wp)
|
||||
# Also kill the tmux session for the worker that owned this task,
|
||||
# if the tmux session is now dead (worker process exited).
|
||||
_cleanup_worker_tmux(conn, task_id)
|
||||
except Exception:
|
||||
pass # best-effort — never block completion
|
||||
|
||||
|
||||
def _cleanup_worker_tmux(conn: sqlite3.Connection, task_id: str) -> None:
|
||||
"""Kill the tmux session associated with a task's assignee, if dead."""
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT assignee FROM tasks WHERE id = ?", (task_id,)
|
||||
).fetchone()
|
||||
if not row or not row["assignee"]:
|
||||
return
|
||||
assignee: str = row["assignee"]
|
||||
# Workers named swarm1-12 use tmux sessions named swarm-swarm1 etc.
|
||||
session = f"swarm-{assignee}"
|
||||
# Check if session exists and pane is dead before killing
|
||||
out = subprocess.run(
|
||||
["tmux", "list-panes", "-t", session, "-F", "#{pane_dead}"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if out.stdout.strip() == "1":
|
||||
subprocess.run(
|
||||
["tmux", "kill-session", "-t", session],
|
||||
capture_output=True, timeout=5,
|
||||
)
|
||||
_log.debug("Killed stale tmux session: %s", session)
|
||||
except Exception:
|
||||
pass # best-effort — never block completion
|
||||
|
||||
|
||||
def edit_completed_task_result(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue