increasing per-chat timeout (re api issues ergh), and tweaked logging

2026-05-15 04:12:25 +00:00 · 2026-02-05 14:54:34 +10:00 · 2026-02-05 14:54:34 +10:00 · beac2ee06a
commit beac2ee06a
parent 487487406d
2 changed files with 12 additions and 5 deletions
--- a/atropos/envs/swe_smith_oracle_env.py
+++ b/atropos/envs/swe_smith_oracle_env.py
@ -37,8 +37,11 @@ class SweSmithOracleEnvConfig(AgentEnvConfig):

    python_only: bool = Field(default=True, description="Filter to Python-evaluable rows")
    score_include_fail_to_pass: bool = Field(
-        default=False,
-        description="If true, score tests on PASS_TO_PASS ∪ FAIL_TO_PASS; else PASS_TO_PASS only.",
+        default=True,
+        description=(
+            "If true (default), score tests on PASS_TO_PASS ∪ FAIL_TO_PASS. "
+            "Disable to only run PASS_TO_PASS (faster but weaker signal)."
+        ),
    )

    prompt_mode: str = Field(
@ -347,6 +350,10 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):

        # Training correctness: do not reward trajectories that never actually used tools.
        if agent_result is not None and getattr(agent_result, "total_tool_calls", 0) <= 0:
+            print(
+                f"[SweSmithOracleEnv] tid={trajectory_id} verify (dataset_tests): no tool calls; score=0.0",
+                flush=True,
+            )
            return 0.0, {
                "verification_mode": "dataset_tests",
                "error": "No tool calls were made by the agent",