From beac2ee06ab65dc2fb8d5ef734faf236631e399a Mon Sep 17 00:00:00 2001 From: Shannon Sands Date: Thu, 5 Feb 2026 14:54:34 +1000 Subject: [PATCH] increasing per-chat timeout (re api issues ergh), and tweaked logging --- atropos/agent/atropos_agent.py | 6 +++--- atropos/envs/swe_smith_oracle_env.py | 11 +++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/atropos/agent/atropos_agent.py b/atropos/agent/atropos_agent.py index 88edc53d358..2de2446ee95 100644 --- a/atropos/agent/atropos_agent.py +++ b/atropos/agent/atropos_agent.py @@ -438,12 +438,12 @@ class AtroposAgent: - `ATROPOS_AGENT_CHAT_TIMEOUT_S`: if set, wraps the await in `asyncio.wait_for`. - `ATROPOS_DEBUG_AGENT_WAIT_EVERY_S`: if set, prints a heartbeat while waiting. """ - # Hard guardrail: never allow a single chat completion to block for more than 2 minutes. + # Hard guardrail: never allow a single chat completion to block for too long. # This is essential for RL data-gen stability; long hangs should be treated as failures (score=0). timeout_s_raw = os.getenv("ATROPOS_AGENT_CHAT_TIMEOUT_S") - timeout_s_default = 120.0 + timeout_s_default = 240.0 timeout_s = float(timeout_s_raw) if timeout_s_raw else timeout_s_default - timeout_s = min(timeout_s, 120.0) + timeout_s = min(timeout_s, 240.0) wait_every_raw = os.getenv("ATROPOS_DEBUG_AGENT_WAIT_EVERY_S") wait_every_s = float(wait_every_raw) if wait_every_raw else None diff --git a/atropos/envs/swe_smith_oracle_env.py b/atropos/envs/swe_smith_oracle_env.py index fee0c9ab7bb..e35521a7c33 100644 --- a/atropos/envs/swe_smith_oracle_env.py +++ b/atropos/envs/swe_smith_oracle_env.py @@ -37,8 +37,11 @@ class SweSmithOracleEnvConfig(AgentEnvConfig): python_only: bool = Field(default=True, description="Filter to Python-evaluable rows") score_include_fail_to_pass: bool = Field( - default=False, - description="If true, score tests on PASS_TO_PASS ∪ FAIL_TO_PASS; else PASS_TO_PASS only.", + default=True, + description=( + "If true (default), score tests on PASS_TO_PASS ∪ FAIL_TO_PASS. " + "Disable to only run PASS_TO_PASS (faster but weaker signal)." + ), ) prompt_mode: str = Field( @@ -347,6 +350,10 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]): # Training correctness: do not reward trajectories that never actually used tools. if agent_result is not None and getattr(agent_result, "total_tool_calls", 0) <= 0: + print( + f"[SweSmithOracleEnv] tid={trajectory_id} verify (dataset_tests): no tool calls; score=0.0", + flush=True, + ) return 0.0, { "verification_mode": "dataset_tests", "error": "No tool calls were made by the agent",