From 88286f6da3a237288935a8eecdf9111259ba2c29 Mon Sep 17 00:00:00 2001 From: Shannon Sands Date: Thu, 5 Feb 2026 10:57:13 +1000 Subject: [PATCH] slow completions over group_size 4, debugging added --- atropos/agent/atropos_agent.py | 16 ++++++++++++---- atropos/envs/agent_env.py | 2 ++ atropos/envs/swe_smith_oracle_env.py | 9 ++++++++- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/atropos/agent/atropos_agent.py b/atropos/agent/atropos_agent.py index 6d995c53b3..254a7953ae 100644 --- a/atropos/agent/atropos_agent.py +++ b/atropos/agent/atropos_agent.py @@ -353,17 +353,25 @@ class AtroposAgent: if not wait_every_s or wait_every_s <= 0: return await managed.chat_completion(**chat_kwargs) + # Heartbeat mode: wait in chunks without cancelling the underlying request. + # NOTE: do NOT use `asyncio.wait_for(task, timeout=...)` here, because a timeout + # will cancel the task and surface as `CancelledError` on the next loop. task = asyncio.create_task(managed.chat_completion(**chat_kwargs)) t0 = time.perf_counter() - while True: - try: - return await asyncio.wait_for(task, timeout=wait_every_s) - except TimeoutError: + try: + while True: + done, _pending = await asyncio.wait({task}, timeout=wait_every_s) + if task in done: + return task.result() + waited = time.perf_counter() - t0 print( f"[AtroposAgent] step={step_num} still waiting for chat_completion... ({waited:.1f}s)", flush=True, ) + except asyncio.CancelledError: + task.cancel() + raise try: if timeout_s and timeout_s > 0: diff --git a/atropos/envs/agent_env.py b/atropos/envs/agent_env.py index 5c18e2fb81..5596266eb3 100644 --- a/atropos/envs/agent_env.py +++ b/atropos/envs/agent_env.py @@ -448,6 +448,8 @@ class AgentEnv(BaseEnv, ABC, Generic[AgentEnvConfigT]): if len(items) != self.config.group_size: return None, backlog + + # TODO: Mack sure logprobs included group: ScoredDataGroup = ScoredDataGroup( tokens=[], diff --git a/atropos/envs/swe_smith_oracle_env.py b/atropos/envs/swe_smith_oracle_env.py index 69dd906a1b..587bd440d1 100644 --- a/atropos/envs/swe_smith_oracle_env.py +++ b/atropos/envs/swe_smith_oracle_env.py @@ -6,6 +6,8 @@ This environment is intentionally minimal: - runs an AtroposAgent tool loop to apply a fix - verifies by running pytest nodeids from the dataset (reward = pass/fail) - Python only (no multi-language support currently, need to properly bauild & add to dropbox) +- TODO: Get the other nonpython sandboxes up and running, then add a config knob to switch between them per row +- oh and add to dockerhub Dataset: NousResearch/SWE-smith-oracle (train; does NOT use SWE-bench eval set). """ @@ -185,6 +187,8 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]): # The dataset "text" field can be extremely large (e.g. includes large code blobs # and long test lists). In local dev and bring-up runs this can make the first LLM # call appear "hung" while the model chews through a massive prompt. Keep a cap. + + # TODO: Remove, smoke test only def _cap(s: str, n: int) -> tuple[str, bool]: if len(s) <= n: return s, False @@ -200,6 +204,7 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]): repo_dir = self._repo_name(item) verify_note = "" + # TODO: Remove, smoke testing only if self.config.verification_mode == "install": verify_note = ( "\nVerification for this run is INSTALL-ONLY:\n" @@ -272,7 +277,9 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]): # Prefer a lightweight "fetch by sha" to avoid pulling full history. # If it fails (some servers disallow fetching unadvertised objects, or we hit - # shallow-object edge cases), fall back to a full clone. + # shallow-object edge cases), fall back to a full clone + + # TODO: tbh, should just do this before setting up worktree & after sandbox build clone_attempts: list[tuple[str, str]] = [] clone_attempts.append( (