diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 4bd4827e386..bad382c339e 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -3067,6 +3067,10 @@ DEFAULT_SPAWN_FAILURE_LIMIT = DEFAULT_FAILURE_LIMIT # and rotates on spawn if the file is larger than this at spawn time. DEFAULT_LOG_ROTATE_BYTES = 2 * 1024 * 1024 # 2 MiB +# Keep a little wall-clock budget for the worker to observe a terminal timeout +# and call kanban_block/kanban_complete before max_runtime_seconds kills it. +KANBAN_TERMINAL_TIMEOUT_GRACE_SECONDS = 30 + @dataclass class DispatchResult: @@ -4077,6 +4081,36 @@ def _resolve_hermes_argv() -> list[str]: return [sys.executable, "-m", "hermes_cli.main"] +def _worker_terminal_timeout_env( + max_runtime_seconds: Optional[int], + current_timeout: Optional[str], +) -> Optional[str]: + """Return a worker-scoped TERMINAL_TIMEOUT override, if needed. + + Kanban's ``max_runtime_seconds`` bounds the whole worker attempt. The + terminal tool has its own default timeout via ``TERMINAL_TIMEOUT``; when + the worker runtime is longer, raise only the child process default so a + long command is not killed by the generic terminal default first. + """ + if max_runtime_seconds is None: + return None + try: + runtime = int(max_runtime_seconds) + except (TypeError, ValueError): + return None + if runtime <= 0: + return None + + desired = max(1, runtime - KANBAN_TERMINAL_TIMEOUT_GRACE_SECONDS) + try: + existing = int(str(current_timeout).strip()) if current_timeout else 0 + except (TypeError, ValueError): + existing = 0 + if existing >= desired: + return None + return str(desired) + + def _default_spawn( task: Task, workspace: str, @@ -4132,6 +4166,18 @@ def _default_spawn( env["HERMES_KANBAN_RUN_ID"] = str(task.current_run_id) if task.claim_lock: env["HERMES_KANBAN_CLAIM_LOCK"] = task.claim_lock + terminal_timeout = _worker_terminal_timeout_env( + task.max_runtime_seconds, + env.get("TERMINAL_TIMEOUT"), + ) + if terminal_timeout is not None: + env["TERMINAL_TIMEOUT"] = terminal_timeout + foreground_timeout = _worker_terminal_timeout_env( + task.max_runtime_seconds, + env.get("TERMINAL_MAX_FOREGROUND_TIMEOUT"), + ) + if foreground_timeout is not None: + env["TERMINAL_MAX_FOREGROUND_TIMEOUT"] = foreground_timeout # Pin the shared board + workspaces root the dispatcher resolved, so # that even when the worker activates a profile (`hermes -p ` # rewrites HERMES_HOME), its kanban paths still match the @@ -4322,6 +4368,15 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str: if task.tenant: lines.append(f"Tenant: {task.tenant}") lines.append(f"Workspace: {task.workspace_kind} @ {task.workspace_path or '(unresolved)'}") + if task.max_runtime_seconds is not None: + terminal_timeout = _worker_terminal_timeout_env( + task.max_runtime_seconds, + os.environ.get("TERMINAL_TIMEOUT"), + ) + effective_terminal_timeout = terminal_timeout or os.environ.get("TERMINAL_TIMEOUT") + lines.append(f"Max runtime: {task.max_runtime_seconds}s") + if effective_terminal_timeout: + lines.append(f"Terminal timeout: {effective_terminal_timeout}s") lines.append("") if task.body and task.body.strip(): diff --git a/tests/hermes_cli/test_kanban_core_functionality.py b/tests/hermes_cli/test_kanban_core_functionality.py index 35dc7ace951..879a74dee51 100644 --- a/tests/hermes_cli/test_kanban_core_functionality.py +++ b/tests/hermes_cli/test_kanban_core_functionality.py @@ -2679,6 +2679,124 @@ def test_default_spawn_auto_loads_kanban_worker_skill(kanban_home, monkeypatch): assert env.get("HERMES_PROFILE") == "some-profile" +def test_default_spawn_raises_terminal_timeout_to_task_runtime(kanban_home, monkeypatch): + """A task runtime cap should raise the worker's terminal default. + + This is worker-scoped env only: normal CLI/gateway terminal settings stay + untouched, but long kanban tasks no longer inherit a short generic + TERMINAL_TIMEOUT that kills their foreground command first. + """ + captured = {} + + class FakeProc: + pid = 123 + + def fake_popen(cmd, **kwargs): + captured["env"] = kwargs.get("env", {}) + return FakeProc() + + monkeypatch.setattr("subprocess.Popen", fake_popen) + monkeypatch.setenv("TERMINAL_TIMEOUT", "180") + monkeypatch.delenv("TERMINAL_MAX_FOREGROUND_TIMEOUT", raising=False) + + conn = kb.connect() + try: + tid = kb.create_task( + conn, + title="long worker", + assignee="ops", + max_runtime_seconds=3600, + ) + task = kb.get_task(conn, tid) + workspace = kb.resolve_workspace(task) + kb._default_spawn(task, str(workspace)) + finally: + conn.close() + + assert captured["env"]["TERMINAL_TIMEOUT"] == "3570" + assert captured["env"]["TERMINAL_MAX_FOREGROUND_TIMEOUT"] == "3570" + assert os.environ["TERMINAL_TIMEOUT"] == "180" + + +def test_default_spawn_preserves_longer_terminal_timeout(kanban_home, monkeypatch): + """Kanban should never lower an explicitly larger terminal timeout.""" + captured = {} + + class FakeProc: + pid = 124 + + def fake_popen(cmd, **kwargs): + captured["env"] = kwargs.get("env", {}) + return FakeProc() + + monkeypatch.setattr("subprocess.Popen", fake_popen) + monkeypatch.setenv("TERMINAL_TIMEOUT", "7200") + monkeypatch.setenv("TERMINAL_MAX_FOREGROUND_TIMEOUT", "7200") + + conn = kb.connect() + try: + tid = kb.create_task( + conn, + title="already tuned", + assignee="ops", + max_runtime_seconds=3600, + ) + task = kb.get_task(conn, tid) + workspace = kb.resolve_workspace(task) + kb._default_spawn(task, str(workspace)) + finally: + conn.close() + + assert captured["env"]["TERMINAL_TIMEOUT"] == "7200" + assert captured["env"]["TERMINAL_MAX_FOREGROUND_TIMEOUT"] == "7200" + + +def test_default_spawn_leaves_terminal_timeout_without_runtime_cap(kanban_home, monkeypatch): + """Uncapped tasks keep the existing terminal timeout behavior.""" + captured = {} + + class FakeProc: + pid = 125 + + def fake_popen(cmd, **kwargs): + captured["env"] = kwargs.get("env", {}) + return FakeProc() + + monkeypatch.setattr("subprocess.Popen", fake_popen) + monkeypatch.setenv("TERMINAL_TIMEOUT", "180") + monkeypatch.delenv("TERMINAL_MAX_FOREGROUND_TIMEOUT", raising=False) + + conn = kb.connect() + try: + tid = kb.create_task(conn, title="uncapped", assignee="ops") + task = kb.get_task(conn, tid) + workspace = kb.resolve_workspace(task) + kb._default_spawn(task, str(workspace)) + finally: + conn.close() + + assert captured["env"]["TERMINAL_TIMEOUT"] == "180" + assert "TERMINAL_MAX_FOREGROUND_TIMEOUT" not in captured["env"] + + +def test_build_worker_context_includes_runtime_timeout_budget(kanban_home, monkeypatch): + monkeypatch.setenv("TERMINAL_TIMEOUT", "180") + conn = kb.connect() + try: + tid = kb.create_task( + conn, + title="long context", + assignee="ops", + max_runtime_seconds=3600, + ) + ctx = kb.build_worker_context(conn, tid) + finally: + conn.close() + + assert "Max runtime: 3600s" in ctx + assert "Terminal timeout: 3570s" in ctx + + # --------------------------------------------------------------------------- # Per-task force-loaded skills