slow completions over group_size 4, debugging added

2026-05-15 04:12:25 +00:00 · 2026-02-05 10:57:13 +10:00 · 2026-02-05 10:57:13 +10:00 · 88286f6da3
commit 88286f6da3
parent 5b82190460
3 changed files with 22 additions and 5 deletions
--- a/atropos/envs/swe_smith_oracle_env.py
+++ b/atropos/envs/swe_smith_oracle_env.py
@ -6,6 +6,8 @@ This environment is intentionally minimal:
 - runs an AtroposAgent tool loop to apply a fix
 - verifies by running pytest nodeids from the dataset (reward = pass/fail)
 - Python only (no multi-language support currently, need to properly bauild & add to dropbox)
+- TODO: Get the other nonpython sandboxes up and running, then add a config knob to switch between them per row
+- oh and add to dockerhub

 Dataset: NousResearch/SWE-smith-oracle (train; does NOT use SWE-bench eval set).
 """
@ -185,6 +187,8 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
        # The dataset "text" field can be extremely large (e.g. includes large code blobs
        # and long test lists). In local dev and bring-up runs this can make the first LLM
        # call appear "hung" while the model chews through a massive prompt. Keep a cap.
+
+        # TODO: Remove, smoke test only
        def _cap(s: str, n: int) -> tuple[str, bool]:
            if len(s) <= n:
                return s, False
@ -200,6 +204,7 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):

        repo_dir = self._repo_name(item)
        verify_note = ""
+        # TODO: Remove, smoke testing only
        if self.config.verification_mode == "install":
            verify_note = (
                "\nVerification for this run is INSTALL-ONLY:\n"
@ -272,7 +277,9 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):

        # Prefer a lightweight "fetch by sha" to avoid pulling full history.
        # If it fails (some servers disallow fetching unadvertised objects, or we hit
-        # shallow-object edge cases), fall back to a full clone.
+        # shallow-object edge cases), fall back to a full clone
+
+        # TODO: tbh, should just do this before setting up worktree & after sandbox build
        clone_attempts: list[tuple[str, str]] = []
        clone_attempts.append(
            (