mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-15 04:12:25 +00:00
Modal backend stubs
This commit is contained in:
parent
7130fa50cb
commit
ea7aa0b0d4
7 changed files with 383 additions and 56 deletions
27
atropos/backends/__init__.py
Normal file
27
atropos/backends/__init__.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .base import ToolBackend
|
||||
from .modal_backend import ModalBackendConfig, ModalToolBackend
|
||||
from .nomad_backend import NomadBackendConfig, NomadToolBackend
|
||||
|
||||
|
||||
def create_tool_backend(cfg: Any) -> ToolBackend:
|
||||
mode = str(getattr(cfg, "tool_pool_mode", "nomad")).strip().lower()
|
||||
if mode == "nomad":
|
||||
return NomadToolBackend(NomadBackendConfig.from_agent_env_config(cfg))
|
||||
if mode == "modal":
|
||||
return ModalToolBackend(ModalBackendConfig.from_agent_env_config(cfg))
|
||||
raise ValueError(f"Unknown tool_pool_mode: {mode}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ToolBackend",
|
||||
"create_tool_backend",
|
||||
"NomadBackendConfig",
|
||||
"NomadToolBackend",
|
||||
"ModalBackendConfig",
|
||||
"ModalToolBackend",
|
||||
]
|
||||
|
||||
89
atropos/backends/base.py
Normal file
89
atropos/backends/base.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
"""
|
||||
Backend interfaces for AgentEnv tool execution.
|
||||
|
||||
The goal of this module is to decouple ToolExecutor / AgentEnv from any single
|
||||
execution backend (Nomad/Docker today; Modal later).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional, Protocol, Tuple
|
||||
|
||||
from ..slots.executor import ExecutionResult
|
||||
from ..slots.slot import Slot
|
||||
|
||||
|
||||
class ToolBackend(Protocol):
|
||||
"""
|
||||
Minimal interface required by ToolExecutor.
|
||||
|
||||
Backends provide:
|
||||
- lifecycle (start/stop)
|
||||
- slot acquisition/release (workspace affinity)
|
||||
- batched tool execution across slots
|
||||
- optional artifact helpers (for env verification / demos)
|
||||
"""
|
||||
|
||||
@property
|
||||
def default_timeout_s(self) -> Optional[float]:
|
||||
"""Default sandbox execution timeout in seconds (if any)."""
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start the backend (provision workers/containers, health checks, etc)."""
|
||||
|
||||
async def stop(self, *, purge: bool = False) -> None:
|
||||
"""Stop the backend and optionally purge remote resources."""
|
||||
|
||||
async def acquire(self, trajectory_id: Optional[str] = None) -> Slot:
|
||||
"""Acquire a slot for a trajectory (workspace affinity)."""
|
||||
|
||||
async def release(self, slot: Slot, *, reset_workspace: bool = False) -> None:
|
||||
"""Release a slot back to the pool."""
|
||||
|
||||
async def execute_batch(
|
||||
self,
|
||||
requests: List[Tuple[Slot, str, Dict[str, Any]]],
|
||||
*,
|
||||
timeout_s: Optional[float] = None,
|
||||
) -> List[ExecutionResult]:
|
||||
"""Execute a batch of sandbox tool calls and return results in order."""
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Optional artifact helpers (supported by the Nomad sandbox-server today)
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
async def read_artifact(
|
||||
self,
|
||||
slot: Slot,
|
||||
path: str,
|
||||
*,
|
||||
encoding: str = "text",
|
||||
max_bytes: Optional[int] = None,
|
||||
include_sha256: bool = False,
|
||||
timeout_s: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def list_artifacts(
|
||||
self,
|
||||
slot: Slot,
|
||||
path: str = ".",
|
||||
*,
|
||||
recursive: bool = False,
|
||||
max_entries: Optional[int] = None,
|
||||
timeout_s: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def archive_artifacts(
|
||||
self,
|
||||
slot: Slot,
|
||||
path: str = ".",
|
||||
*,
|
||||
archive_format: str = "tar.gz",
|
||||
max_bytes: Optional[int] = None,
|
||||
max_entries: Optional[int] = None,
|
||||
timeout_s: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
raise NotImplementedError
|
||||
|
||||
73
atropos/backends/modal_backend.py
Normal file
73
atropos/backends/modal_backend.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
"""
|
||||
Modal tool backend (stub).
|
||||
|
||||
We intentionally ship a placeholder implementation so AgentEnv can expose a
|
||||
backend switch without forcing Modal as a hard dependency for Hermes-Agent.
|
||||
|
||||
When org access is available, this backend will be implemented by running a
|
||||
long-lived Modal worker (or pool) that owns N slots and exposes `execute_batch`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from ..slots.executor import ExecutionResult
|
||||
from ..slots.slot import Slot
|
||||
from .base import ToolBackend
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModalBackendConfig:
|
||||
# Placeholders for future implementation.
|
||||
app_name: str = "atropos-sandbox"
|
||||
function_name: str = "sandbox_server"
|
||||
volume_name: Optional[str] = None
|
||||
volume_mount_path: str = "/data"
|
||||
|
||||
@classmethod
|
||||
def from_agent_env_config(cls, cfg: Any) -> "ModalBackendConfig":
|
||||
return cls(
|
||||
app_name=str(getattr(cfg, "modal_app_name", cls.app_name)),
|
||||
function_name=str(getattr(cfg, "modal_function_name", cls.function_name)),
|
||||
volume_name=(getattr(cfg, "modal_volume_name", None) or None),
|
||||
volume_mount_path=str(getattr(cfg, "modal_volume_mount_path", cls.volume_mount_path)),
|
||||
)
|
||||
|
||||
|
||||
class ModalToolBackend(ToolBackend):
|
||||
def __init__(self, config: ModalBackendConfig):
|
||||
self.config = config
|
||||
|
||||
@property
|
||||
def default_timeout_s(self) -> Optional[float]:
|
||||
return None
|
||||
|
||||
def _unavailable(self) -> RuntimeError:
|
||||
return RuntimeError(
|
||||
"Modal tool backend is not implemented yet. "
|
||||
"Keep `--env.tool_pool_mode nomad` for now."
|
||||
)
|
||||
|
||||
async def start(self) -> None:
|
||||
raise self._unavailable()
|
||||
|
||||
async def stop(self, *, purge: bool = False) -> None: # noqa: ARG002
|
||||
# If start() isn't implemented, stop() is also unavailable.
|
||||
raise self._unavailable()
|
||||
|
||||
async def acquire(self, trajectory_id: Optional[str] = None) -> Slot: # noqa: ARG002
|
||||
raise self._unavailable()
|
||||
|
||||
async def release(self, slot: Slot, *, reset_workspace: bool = False) -> None: # noqa: ARG002
|
||||
raise self._unavailable()
|
||||
|
||||
async def execute_batch(
|
||||
self,
|
||||
requests: List[Tuple[Slot, str, Dict[str, Any]]],
|
||||
*,
|
||||
timeout_s: Optional[float] = None, # noqa: ARG002
|
||||
) -> List[ExecutionResult]:
|
||||
raise self._unavailable()
|
||||
|
||||
148
atropos/backends/nomad_backend.py
Normal file
148
atropos/backends/nomad_backend.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
"""
|
||||
Nomad/Docker tool backend.
|
||||
|
||||
This backend is the current default for AgentEnv: it provisions a Nomad job
|
||||
running `sandbox_server.py` and multiplexes stateless slots inside each container.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from ..slots import Slot, SlotPool, SlotPoolConfig
|
||||
from ..slots.executor import ExecutionResult
|
||||
from .base import ToolBackend
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NomadBackendConfig:
|
||||
nomad_address: str
|
||||
sandbox_job_id: str
|
||||
sandbox_image: str
|
||||
slots_per_container: int
|
||||
min_containers: int
|
||||
max_containers: int
|
||||
privileged: bool
|
||||
acquire_timeout_s: float
|
||||
purge_job_on_start: bool
|
||||
|
||||
@classmethod
|
||||
def from_agent_env_config(cls, cfg: Any) -> "NomadBackendConfig":
|
||||
return cls(
|
||||
nomad_address=str(getattr(cfg, "nomad_address")),
|
||||
sandbox_job_id=str(getattr(cfg, "sandbox_job_id")),
|
||||
sandbox_image=str(getattr(cfg, "sandbox_image")),
|
||||
slots_per_container=int(getattr(cfg, "slots_per_container")),
|
||||
min_containers=int(getattr(cfg, "min_containers")),
|
||||
max_containers=int(getattr(cfg, "max_containers")),
|
||||
privileged=bool(getattr(cfg, "privileged")),
|
||||
acquire_timeout_s=float(getattr(cfg, "acquire_timeout_s")),
|
||||
purge_job_on_start=bool(getattr(cfg, "purge_job_on_start", False)),
|
||||
)
|
||||
|
||||
|
||||
class NomadToolBackend(ToolBackend):
|
||||
def __init__(self, config: NomadBackendConfig):
|
||||
self.config = config
|
||||
self.pool = SlotPool(
|
||||
SlotPoolConfig(
|
||||
nomad_address=config.nomad_address,
|
||||
job_id=config.sandbox_job_id,
|
||||
image=config.sandbox_image,
|
||||
slots_per_container=config.slots_per_container,
|
||||
min_containers=config.min_containers,
|
||||
max_containers=config.max_containers,
|
||||
privileged=config.privileged,
|
||||
acquire_timeout=config.acquire_timeout_s,
|
||||
purge_job_on_start=bool(config.purge_job_on_start),
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def default_timeout_s(self) -> Optional[float]:
|
||||
t = getattr(self.pool.executor, "timeout", None)
|
||||
total = getattr(t, "total", None)
|
||||
try:
|
||||
return float(total) if total is not None else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
async def start(self) -> None:
|
||||
await self.pool.start()
|
||||
|
||||
async def stop(self, *, purge: bool = False) -> None:
|
||||
await self.pool.stop(purge_job=purge)
|
||||
|
||||
async def acquire(self, trajectory_id: Optional[str] = None) -> Slot:
|
||||
return await self.pool.acquire(trajectory_id)
|
||||
|
||||
async def release(self, slot: Slot, *, reset_workspace: bool = False) -> None:
|
||||
await self.pool.release(slot, reset_workspace=reset_workspace)
|
||||
|
||||
async def execute_batch(
|
||||
self,
|
||||
requests: List[Tuple[Slot, str, Dict[str, Any]]],
|
||||
*,
|
||||
timeout_s: Optional[float] = None,
|
||||
) -> List[ExecutionResult]:
|
||||
return await self.pool.execute_batch(requests, timeout=timeout_s)
|
||||
|
||||
async def read_artifact(
|
||||
self,
|
||||
slot: Slot,
|
||||
path: str,
|
||||
*,
|
||||
encoding: str = "text",
|
||||
max_bytes: Optional[int] = None,
|
||||
include_sha256: bool = False,
|
||||
timeout_s: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
return await self.pool.executor.read_artifact(
|
||||
slot,
|
||||
path,
|
||||
encoding=encoding,
|
||||
max_bytes=max_bytes,
|
||||
include_sha256=include_sha256,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
|
||||
async def list_artifacts(
|
||||
self,
|
||||
slot: Slot,
|
||||
path: str = ".",
|
||||
*,
|
||||
recursive: bool = False,
|
||||
max_entries: Optional[int] = None,
|
||||
timeout_s: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
return await self.pool.executor.list_artifacts(
|
||||
slot,
|
||||
path,
|
||||
recursive=recursive,
|
||||
max_entries=max_entries,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
|
||||
async def archive_artifacts(
|
||||
self,
|
||||
slot: Slot,
|
||||
path: str = ".",
|
||||
*,
|
||||
archive_format: str = "tar.gz",
|
||||
max_bytes: Optional[int] = None,
|
||||
max_entries: Optional[int] = None,
|
||||
timeout_s: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
return await self.pool.executor.archive_artifacts(
|
||||
slot,
|
||||
path,
|
||||
archive_format=archive_format,
|
||||
max_bytes=max_bytes,
|
||||
max_entries=max_entries,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
return self.pool.get_stats()
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue