mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: Modal sandbox eval infra (9 fixes for TBLite baseline)
Fixes discovered while running TBLite baseline evaluation: 1. ephemeral_disk param not supported in modal 1.3.5 - check before passing 2. Modal legacy image builder requires working pip - add ensurepip fix via setup_dockerfile_commands to handle task images with broken pip 3. Host cwd leaked into Modal sandbox - add /home/ to host prefix check 4. Tilde ~ not expanded by subprocess.run(cwd=) in sandboxes - use /root 5. install_pipx must stay True for swerex-remote to be available Dependencies also needed (not in this commit): - git submodule update --init mini-swe-agent - uv pip install swe-rex boto3
This commit is contained in:
parent
2c97bf3936
commit
d7f4db53f5
3 changed files with 26 additions and 4 deletions
|
|
@ -114,11 +114,27 @@ def _patch_swerex_modal():
|
||||||
self._worker = _AsyncWorker()
|
self._worker = _AsyncWorker()
|
||||||
self._worker.start()
|
self._worker.start()
|
||||||
|
|
||||||
|
# Pre-build a modal.Image with pip fix for Modal's legacy image builder.
|
||||||
|
# Modal requires `python -m pip` to work during image build, but some
|
||||||
|
# task images (e.g., TBLite's broken-python) have intentionally broken pip.
|
||||||
|
# Fix: remove stale pip dist-info and reinstall via ensurepip before Modal
|
||||||
|
# tries to use it. This is a no-op for images where pip already works.
|
||||||
|
import modal as _modal
|
||||||
|
image_spec = self.config.image
|
||||||
|
if isinstance(image_spec, str):
|
||||||
|
image_spec = _modal.Image.from_registry(
|
||||||
|
image_spec,
|
||||||
|
setup_dockerfile_commands=[
|
||||||
|
"RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
|
||||||
|
"python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
# Create AND start the deployment entirely on the worker's loop/thread
|
# Create AND start the deployment entirely on the worker's loop/thread
|
||||||
# so all gRPC channels and async state are bound to that loop
|
# so all gRPC channels and async state are bound to that loop
|
||||||
async def _create_and_start():
|
async def _create_and_start():
|
||||||
deployment = ModalDeployment(
|
deployment = ModalDeployment(
|
||||||
image=self.config.image,
|
image=image_spec,
|
||||||
startup_timeout=self.config.startup_timeout,
|
startup_timeout=self.config.startup_timeout,
|
||||||
runtime_timeout=self.config.runtime_timeout,
|
runtime_timeout=self.config.runtime_timeout,
|
||||||
deployment_timeout=self.config.deployment_timeout,
|
deployment_timeout=self.config.deployment_timeout,
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ class ModalEnvironment(BaseEnvironment):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
image: str,
|
image: str,
|
||||||
cwd: str = "~",
|
cwd: str = "/root",
|
||||||
timeout: int = 60,
|
timeout: int = 60,
|
||||||
modal_sandbox_kwargs: Optional[Dict[str, Any]] = None,
|
modal_sandbox_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
persistent_filesystem: bool = True,
|
persistent_filesystem: bool = True,
|
||||||
|
|
@ -95,6 +95,7 @@ class ModalEnvironment(BaseEnvironment):
|
||||||
startup_timeout=180.0,
|
startup_timeout=180.0,
|
||||||
runtime_timeout=3600.0,
|
runtime_timeout=3600.0,
|
||||||
modal_sandbox_kwargs=sandbox_kwargs,
|
modal_sandbox_kwargs=sandbox_kwargs,
|
||||||
|
install_pipx=True, # Required: installs pipx + swe-rex runtime (swerex-remote)
|
||||||
)
|
)
|
||||||
|
|
||||||
def execute(self, command: str, cwd: str = "", *,
|
def execute(self, command: str, cwd: str = "", *,
|
||||||
|
|
|
||||||
|
|
@ -463,7 +463,7 @@ def _get_env_config() -> Dict[str, Any]:
|
||||||
if env_type == "local":
|
if env_type == "local":
|
||||||
default_cwd = os.getcwd()
|
default_cwd = os.getcwd()
|
||||||
else:
|
else:
|
||||||
default_cwd = "~"
|
default_cwd = "/root"
|
||||||
|
|
||||||
# Read TERMINAL_CWD but sanity-check it for container backends.
|
# Read TERMINAL_CWD but sanity-check it for container backends.
|
||||||
# If the CWD looks like a host-local path that can't exist inside a
|
# If the CWD looks like a host-local path that can't exist inside a
|
||||||
|
|
@ -553,7 +553,12 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
||||||
if memory > 0:
|
if memory > 0:
|
||||||
sandbox_kwargs["memory"] = memory
|
sandbox_kwargs["memory"] = memory
|
||||||
if disk > 0:
|
if disk > 0:
|
||||||
sandbox_kwargs["ephemeral_disk"] = disk
|
try:
|
||||||
|
import inspect, modal
|
||||||
|
if "ephemeral_disk" in inspect.signature(modal.Sandbox.create).parameters:
|
||||||
|
sandbox_kwargs["ephemeral_disk"] = disk
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return _ModalEnvironment(
|
return _ModalEnvironment(
|
||||||
image=image, cwd=cwd, timeout=timeout,
|
image=image, cwd=cwd, timeout=timeout,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue