mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(kanban): align worker terminal timeout with task runtime
This commit is contained in:
parent
0292398604
commit
8831eb5c70
2 changed files with 173 additions and 0 deletions
|
|
@ -3067,6 +3067,10 @@ DEFAULT_SPAWN_FAILURE_LIMIT = DEFAULT_FAILURE_LIMIT
|
|||
# and rotates on spawn if the file is larger than this at spawn time.
|
||||
DEFAULT_LOG_ROTATE_BYTES = 2 * 1024 * 1024 # 2 MiB
|
||||
|
||||
# Keep a little wall-clock budget for the worker to observe a terminal timeout
|
||||
# and call kanban_block/kanban_complete before max_runtime_seconds kills it.
|
||||
KANBAN_TERMINAL_TIMEOUT_GRACE_SECONDS = 30
|
||||
|
||||
|
||||
@dataclass
|
||||
class DispatchResult:
|
||||
|
|
@ -4077,6 +4081,36 @@ def _resolve_hermes_argv() -> list[str]:
|
|||
return [sys.executable, "-m", "hermes_cli.main"]
|
||||
|
||||
|
||||
def _worker_terminal_timeout_env(
|
||||
max_runtime_seconds: Optional[int],
|
||||
current_timeout: Optional[str],
|
||||
) -> Optional[str]:
|
||||
"""Return a worker-scoped TERMINAL_TIMEOUT override, if needed.
|
||||
|
||||
Kanban's ``max_runtime_seconds`` bounds the whole worker attempt. The
|
||||
terminal tool has its own default timeout via ``TERMINAL_TIMEOUT``; when
|
||||
the worker runtime is longer, raise only the child process default so a
|
||||
long command is not killed by the generic terminal default first.
|
||||
"""
|
||||
if max_runtime_seconds is None:
|
||||
return None
|
||||
try:
|
||||
runtime = int(max_runtime_seconds)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if runtime <= 0:
|
||||
return None
|
||||
|
||||
desired = max(1, runtime - KANBAN_TERMINAL_TIMEOUT_GRACE_SECONDS)
|
||||
try:
|
||||
existing = int(str(current_timeout).strip()) if current_timeout else 0
|
||||
except (TypeError, ValueError):
|
||||
existing = 0
|
||||
if existing >= desired:
|
||||
return None
|
||||
return str(desired)
|
||||
|
||||
|
||||
def _default_spawn(
|
||||
task: Task,
|
||||
workspace: str,
|
||||
|
|
@ -4132,6 +4166,18 @@ def _default_spawn(
|
|||
env["HERMES_KANBAN_RUN_ID"] = str(task.current_run_id)
|
||||
if task.claim_lock:
|
||||
env["HERMES_KANBAN_CLAIM_LOCK"] = task.claim_lock
|
||||
terminal_timeout = _worker_terminal_timeout_env(
|
||||
task.max_runtime_seconds,
|
||||
env.get("TERMINAL_TIMEOUT"),
|
||||
)
|
||||
if terminal_timeout is not None:
|
||||
env["TERMINAL_TIMEOUT"] = terminal_timeout
|
||||
foreground_timeout = _worker_terminal_timeout_env(
|
||||
task.max_runtime_seconds,
|
||||
env.get("TERMINAL_MAX_FOREGROUND_TIMEOUT"),
|
||||
)
|
||||
if foreground_timeout is not None:
|
||||
env["TERMINAL_MAX_FOREGROUND_TIMEOUT"] = foreground_timeout
|
||||
# Pin the shared board + workspaces root the dispatcher resolved, so
|
||||
# that even when the worker activates a profile (`hermes -p <name>`
|
||||
# rewrites HERMES_HOME), its kanban paths still match the
|
||||
|
|
@ -4322,6 +4368,15 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
|
|||
if task.tenant:
|
||||
lines.append(f"Tenant: {task.tenant}")
|
||||
lines.append(f"Workspace: {task.workspace_kind} @ {task.workspace_path or '(unresolved)'}")
|
||||
if task.max_runtime_seconds is not None:
|
||||
terminal_timeout = _worker_terminal_timeout_env(
|
||||
task.max_runtime_seconds,
|
||||
os.environ.get("TERMINAL_TIMEOUT"),
|
||||
)
|
||||
effective_terminal_timeout = terminal_timeout or os.environ.get("TERMINAL_TIMEOUT")
|
||||
lines.append(f"Max runtime: {task.max_runtime_seconds}s")
|
||||
if effective_terminal_timeout:
|
||||
lines.append(f"Terminal timeout: {effective_terminal_timeout}s")
|
||||
lines.append("")
|
||||
|
||||
if task.body and task.body.strip():
|
||||
|
|
|
|||
|
|
@ -2679,6 +2679,124 @@ def test_default_spawn_auto_loads_kanban_worker_skill(kanban_home, monkeypatch):
|
|||
assert env.get("HERMES_PROFILE") == "some-profile"
|
||||
|
||||
|
||||
def test_default_spawn_raises_terminal_timeout_to_task_runtime(kanban_home, monkeypatch):
|
||||
"""A task runtime cap should raise the worker's terminal default.
|
||||
|
||||
This is worker-scoped env only: normal CLI/gateway terminal settings stay
|
||||
untouched, but long kanban tasks no longer inherit a short generic
|
||||
TERMINAL_TIMEOUT that kills their foreground command first.
|
||||
"""
|
||||
captured = {}
|
||||
|
||||
class FakeProc:
|
||||
pid = 123
|
||||
|
||||
def fake_popen(cmd, **kwargs):
|
||||
captured["env"] = kwargs.get("env", {})
|
||||
return FakeProc()
|
||||
|
||||
monkeypatch.setattr("subprocess.Popen", fake_popen)
|
||||
monkeypatch.setenv("TERMINAL_TIMEOUT", "180")
|
||||
monkeypatch.delenv("TERMINAL_MAX_FOREGROUND_TIMEOUT", raising=False)
|
||||
|
||||
conn = kb.connect()
|
||||
try:
|
||||
tid = kb.create_task(
|
||||
conn,
|
||||
title="long worker",
|
||||
assignee="ops",
|
||||
max_runtime_seconds=3600,
|
||||
)
|
||||
task = kb.get_task(conn, tid)
|
||||
workspace = kb.resolve_workspace(task)
|
||||
kb._default_spawn(task, str(workspace))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
assert captured["env"]["TERMINAL_TIMEOUT"] == "3570"
|
||||
assert captured["env"]["TERMINAL_MAX_FOREGROUND_TIMEOUT"] == "3570"
|
||||
assert os.environ["TERMINAL_TIMEOUT"] == "180"
|
||||
|
||||
|
||||
def test_default_spawn_preserves_longer_terminal_timeout(kanban_home, monkeypatch):
|
||||
"""Kanban should never lower an explicitly larger terminal timeout."""
|
||||
captured = {}
|
||||
|
||||
class FakeProc:
|
||||
pid = 124
|
||||
|
||||
def fake_popen(cmd, **kwargs):
|
||||
captured["env"] = kwargs.get("env", {})
|
||||
return FakeProc()
|
||||
|
||||
monkeypatch.setattr("subprocess.Popen", fake_popen)
|
||||
monkeypatch.setenv("TERMINAL_TIMEOUT", "7200")
|
||||
monkeypatch.setenv("TERMINAL_MAX_FOREGROUND_TIMEOUT", "7200")
|
||||
|
||||
conn = kb.connect()
|
||||
try:
|
||||
tid = kb.create_task(
|
||||
conn,
|
||||
title="already tuned",
|
||||
assignee="ops",
|
||||
max_runtime_seconds=3600,
|
||||
)
|
||||
task = kb.get_task(conn, tid)
|
||||
workspace = kb.resolve_workspace(task)
|
||||
kb._default_spawn(task, str(workspace))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
assert captured["env"]["TERMINAL_TIMEOUT"] == "7200"
|
||||
assert captured["env"]["TERMINAL_MAX_FOREGROUND_TIMEOUT"] == "7200"
|
||||
|
||||
|
||||
def test_default_spawn_leaves_terminal_timeout_without_runtime_cap(kanban_home, monkeypatch):
|
||||
"""Uncapped tasks keep the existing terminal timeout behavior."""
|
||||
captured = {}
|
||||
|
||||
class FakeProc:
|
||||
pid = 125
|
||||
|
||||
def fake_popen(cmd, **kwargs):
|
||||
captured["env"] = kwargs.get("env", {})
|
||||
return FakeProc()
|
||||
|
||||
monkeypatch.setattr("subprocess.Popen", fake_popen)
|
||||
monkeypatch.setenv("TERMINAL_TIMEOUT", "180")
|
||||
monkeypatch.delenv("TERMINAL_MAX_FOREGROUND_TIMEOUT", raising=False)
|
||||
|
||||
conn = kb.connect()
|
||||
try:
|
||||
tid = kb.create_task(conn, title="uncapped", assignee="ops")
|
||||
task = kb.get_task(conn, tid)
|
||||
workspace = kb.resolve_workspace(task)
|
||||
kb._default_spawn(task, str(workspace))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
assert captured["env"]["TERMINAL_TIMEOUT"] == "180"
|
||||
assert "TERMINAL_MAX_FOREGROUND_TIMEOUT" not in captured["env"]
|
||||
|
||||
|
||||
def test_build_worker_context_includes_runtime_timeout_budget(kanban_home, monkeypatch):
|
||||
monkeypatch.setenv("TERMINAL_TIMEOUT", "180")
|
||||
conn = kb.connect()
|
||||
try:
|
||||
tid = kb.create_task(
|
||||
conn,
|
||||
title="long context",
|
||||
assignee="ops",
|
||||
max_runtime_seconds=3600,
|
||||
)
|
||||
ctx = kb.build_worker_context(conn, tid)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
assert "Max runtime: 3600s" in ctx
|
||||
assert "Terminal timeout: 3570s" in ctx
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-task force-loaded skills
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue