mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Multiple gaps in activity tracking could cause the gateway's inactivity timeout to fire while the agent is actively working: 1. Streaming wait loop had no periodic heartbeat — the outer thread only touched activity when the stale-stream detector fired (180-300s), and for local providers (Ollama) the stale timeout was infinity, meaning zero heartbeats. Now touches activity every 30s. 2. Concurrent tool execution never set the activity callback on worker threads (threading.local invisible across threads) and never set _current_tool. Workers now set the callback, and the concurrent wait uses a polling loop with 30s heartbeats. 3. Modal backend's execute() override had its own polling loop without any activity callback. Now matches _wait_for_process cadence (10s).
206 lines
6.8 KiB
Python
206 lines
6.8 KiB
Python
"""Shared Hermes-side execution flow for Modal transports.
|
|
|
|
This module deliberately stops at the Hermes boundary:
|
|
- command preparation
|
|
- cwd/timeout normalization
|
|
- stdin/sudo shell wrapping
|
|
- common result shape
|
|
- interrupt/cancel polling
|
|
|
|
Direct Modal and managed Modal keep separate transport logic, persistence, and
|
|
trust-boundary decisions in their own modules.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import shlex
|
|
import time
|
|
import uuid
|
|
from abc import abstractmethod
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
from tools.environments.base import BaseEnvironment
|
|
from tools.interrupt import is_interrupted
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class PreparedModalExec:
|
|
"""Normalized command data passed to a transport-specific exec runner."""
|
|
|
|
command: str
|
|
cwd: str
|
|
timeout: int
|
|
stdin_data: str | None = None
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ModalExecStart:
|
|
"""Transport response after starting an exec."""
|
|
|
|
handle: Any | None = None
|
|
immediate_result: dict | None = None
|
|
|
|
|
|
def wrap_modal_stdin_heredoc(command: str, stdin_data: str) -> str:
|
|
"""Append stdin as a shell heredoc for transports without stdin piping."""
|
|
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
|
while marker in stdin_data:
|
|
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
|
return f"{command} << '{marker}'\n{stdin_data}\n{marker}"
|
|
|
|
|
|
def wrap_modal_sudo_pipe(command: str, sudo_stdin: str) -> str:
|
|
"""Feed sudo via a shell pipe for transports without direct stdin piping."""
|
|
return f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {command}"
|
|
|
|
|
|
class BaseModalExecutionEnvironment(BaseEnvironment):
|
|
"""Execution flow for the *managed* Modal transport (gateway-owned sandbox).
|
|
|
|
This deliberately overrides :meth:`BaseEnvironment.execute` because the
|
|
tool-gateway handles command preparation, CWD tracking, and env-snapshot
|
|
management on the server side. The base class's ``_wrap_command`` /
|
|
``_wait_for_process`` / snapshot machinery does not apply here — the
|
|
gateway owns that responsibility. See ``ManagedModalEnvironment`` for the
|
|
concrete subclass.
|
|
"""
|
|
|
|
_stdin_mode = "payload"
|
|
_poll_interval_seconds = 0.25
|
|
_client_timeout_grace_seconds: float | None = None
|
|
_interrupt_output = "[Command interrupted]"
|
|
_unexpected_error_prefix = "Modal execution error"
|
|
|
|
def execute(
|
|
self,
|
|
command: str,
|
|
cwd: str = "",
|
|
*,
|
|
timeout: int | None = None,
|
|
stdin_data: str | None = None,
|
|
) -> dict:
|
|
self._before_execute()
|
|
prepared = self._prepare_modal_exec(
|
|
command,
|
|
cwd=cwd,
|
|
timeout=timeout,
|
|
stdin_data=stdin_data,
|
|
)
|
|
|
|
try:
|
|
start = self._start_modal_exec(prepared)
|
|
except Exception as exc:
|
|
return self._error_result(f"{self._unexpected_error_prefix}: {exc}")
|
|
|
|
if start.immediate_result is not None:
|
|
return start.immediate_result
|
|
|
|
if start.handle is None:
|
|
return self._error_result(
|
|
f"{self._unexpected_error_prefix}: transport did not return an exec handle"
|
|
)
|
|
|
|
deadline = None
|
|
if self._client_timeout_grace_seconds is not None:
|
|
deadline = time.monotonic() + prepared.timeout + self._client_timeout_grace_seconds
|
|
|
|
_last_activity_touch = time.monotonic()
|
|
_modal_exec_start = time.monotonic()
|
|
_ACTIVITY_INTERVAL = 10.0 # match _wait_for_process cadence
|
|
|
|
while True:
|
|
if is_interrupted():
|
|
try:
|
|
self._cancel_modal_exec(start.handle)
|
|
except Exception:
|
|
pass
|
|
return self._result(self._interrupt_output, 130)
|
|
|
|
try:
|
|
result = self._poll_modal_exec(start.handle)
|
|
except Exception as exc:
|
|
return self._error_result(f"{self._unexpected_error_prefix}: {exc}")
|
|
|
|
if result is not None:
|
|
return result
|
|
|
|
if deadline is not None and time.monotonic() >= deadline:
|
|
try:
|
|
self._cancel_modal_exec(start.handle)
|
|
except Exception:
|
|
pass
|
|
return self._timeout_result_for_modal(prepared.timeout)
|
|
|
|
# Periodic activity touch so the gateway knows we're alive
|
|
_now = time.monotonic()
|
|
if _now - _last_activity_touch >= _ACTIVITY_INTERVAL:
|
|
_last_activity_touch = _now
|
|
try:
|
|
from tools.environments.base import _get_activity_callback
|
|
_cb = _get_activity_callback()
|
|
except Exception:
|
|
_cb = None
|
|
if _cb:
|
|
try:
|
|
_elapsed = int(_now - _modal_exec_start)
|
|
_cb(f"modal command running ({_elapsed}s elapsed)")
|
|
except Exception:
|
|
pass
|
|
|
|
time.sleep(self._poll_interval_seconds)
|
|
|
|
def _before_execute(self) -> None:
|
|
"""Hook for backends that need pre-exec sync or validation."""
|
|
pass
|
|
|
|
def _prepare_modal_exec(
|
|
self,
|
|
command: str,
|
|
*,
|
|
cwd: str = "",
|
|
timeout: int | None = None,
|
|
stdin_data: str | None = None,
|
|
) -> PreparedModalExec:
|
|
effective_cwd = cwd or self.cwd
|
|
effective_timeout = timeout or self.timeout
|
|
|
|
exec_command = command
|
|
exec_stdin = stdin_data if self._stdin_mode == "payload" else None
|
|
if stdin_data is not None and self._stdin_mode == "heredoc":
|
|
exec_command = wrap_modal_stdin_heredoc(exec_command, stdin_data)
|
|
|
|
exec_command, sudo_stdin = self._prepare_command(exec_command)
|
|
if sudo_stdin is not None:
|
|
exec_command = wrap_modal_sudo_pipe(exec_command, sudo_stdin)
|
|
|
|
return PreparedModalExec(
|
|
command=exec_command,
|
|
cwd=effective_cwd,
|
|
timeout=effective_timeout,
|
|
stdin_data=exec_stdin,
|
|
)
|
|
|
|
def _result(self, output: str, returncode: int) -> dict:
|
|
return {
|
|
"output": output,
|
|
"returncode": returncode,
|
|
}
|
|
|
|
def _error_result(self, output: str) -> dict:
|
|
return self._result(output, 1)
|
|
|
|
def _timeout_result_for_modal(self, timeout: int) -> dict:
|
|
return self._result(f"Command timed out after {timeout}s", 124)
|
|
|
|
@abstractmethod
|
|
def _start_modal_exec(self, prepared: PreparedModalExec) -> ModalExecStart:
|
|
"""Begin a transport-specific exec."""
|
|
|
|
@abstractmethod
|
|
def _poll_modal_exec(self, handle: Any) -> dict | None:
|
|
"""Return a final result dict when complete, else ``None``."""
|
|
|
|
@abstractmethod
|
|
def _cancel_modal_exec(self, handle: Any) -> None:
|
|
"""Cancel or terminate the active transport exec."""
|