fixed infinite loop on agent errors

2026-05-08 03:01:47 +00:00 · 2026-02-04 14:25:08 +10:00 · 2026-02-04 14:25:08 +10:00 · 7130fa50cb
commit 7130fa50cb
parent 5a9c98a771
4 changed files with 144 additions and 23 deletions
--- a/atropos/agent/atropos_agent.py
+++ b/atropos/agent/atropos_agent.py
@ -34,20 +34,23 @@ load_dotenv()
 #
 # IMPORTANT: In training-mode environments we want "raw text in -> raw text out" and we
 # parse tool calls from completion text. Do not rely on server-specific `tool_calls` fields.
-AGENT_SYSTEM_PROMPT = """You are a function-calling AI model.
+AGENT_SYSTEM_PROMPT = """You are a deep thinking AI. You MUST enclose your internal reasoning inside <think>...</think> tags.
 You are a function calling AI model.
 You are provided with function signatures within <tools></tools> XML tags.
-You may call one or more functions to assist with the user query. If available tools are not relevant,
+You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.
 respond in natural language.
-After calling & executing a function, you will be provided with function results within
+After calling & executing a function, you will be provided with function results within <tool_response></tool_response> XML tags.
 <tool_response></tool_response> XML tags.
 Here are the available tools:
 <tools>
 {tools_json}
 </tools>
 Use the following JSON schema for each tool call you will make:
 {"title": "FunctionCall", "type": "object", "properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"]}
 ## REQUIRED TOOL FORMAT
 When you decide to call a tool, your assistant message MUST be:
@ -55,10 +58,14 @@ When you decide to call a tool, your assistant message MUST be:
 2) one or more <tool_call>...</tool_call> blocks,
 and NOTHING else in that message.
-For each tool call, output a JSON object with this schema:
+If you need to explain anything, put it inside <think>. Do NOT write natural language outside <think> or <tool_call>.
 {"name": "function_name", "arguments": { ... }}
-Each tool call MUST be enclosed within <tool_call></tool_call> XML tags.
+For each function call return a JSON object with function name and arguments within <tool_call></tool_call> XML tags as follows:
 <tool_call>
 {"name": "<function-name>", "arguments": {"arg1": "value1"}}
 </tool_call>
 Each <tool_call> must be on its own and contain ONLY the JSON object (no extra text).
 The JSON inside <tool_call> MUST be valid JSON with double quotes.
 Do NOT output <tool_response> in an assistant message.
@ -66,27 +73,44 @@ Do NOT output <tool_response> in an assistant message.
 After you receive tool results, you may either call more tools (same required format) or provide the final answer.
 When providing the final answer, do NOT include any <tool_call> blocks.
 ## TERMINAL TOOL NOTES
 - Commands execute under POSIX `/bin/sh` (not bash).
 - Each tool call runs in a fresh shell: environment changes (like `cd` or venv activation) do not persist across tool calls.
 - Avoid bash-only features like `source`, `[[ ... ]]`, or process substitution.
 - Prefer explicit venv usage:
  - `python -m venv .venv && . .venv/bin/activate && python -m pip install -e .` (POSIX `.` activation), or
  - `.venv/bin/python -m pip install -e .` (no activation required).
 ## ICL (examples)
 User: Show the current directory.
 Assistant:
-<think>I should use the terminal tool to print the current directory.</think>
+<think>I should run pwd.</think>
-<tool_call>{"name": "terminal", "arguments": {"command": "pwd"}}</tool_call>
+<tool_call>
 {"name": "terminal", "arguments": {"command": "pwd"}}
 </tool_call>
 User: <tool_response>{"success": true, "output": "/tmp\\n"}</tool_response>
 Assistant: /tmp
 User: List files, then count them.
 Assistant:
-<think>I should list files and count lines.</think>
+<think>I should count files.</think>
-<tool_call>{"name": "terminal", "arguments": {"command": "ls -1 | wc -l"}}</tool_call>
+<tool_call>
 {"name": "terminal", "arguments": {"command": "ls -1 | wc -l"}}
 </tool_call>
 User: <tool_response>{"success": true, "output": "3\\n"}</tool_response>
 Assistant: 3
-User: Run pwd, then print ok.
+User: Run pwd, then print ok (two tool calls).
 Assistant:
-<think>I should run pwd, then run a command that prints ok.</think>
+<think>I should run two commands.</think>
-<tool_call>{"name": "terminal", "arguments": {"command": "pwd"}}</tool_call>
+<tool_call>
-<tool_call>{"name": "terminal", "arguments": {"command": "echo ok"}}</tool_call>
+{"name": "terminal", "arguments": {"command": "pwd"}}
 </tool_call>
 <tool_call>
 {"name": "terminal", "arguments": {"command": "echo ok"}}
 </tool_call>
 User: <tool_response>{"success": true, "output": "/tmp\\n"}</tool_response>
 User: <tool_response>{"success": true, "output": "ok\\n"}</tool_response>
 Assistant: ok
@ -337,6 +361,9 @@ class AtroposAgent:
        final_response = ""
        final_node = None
        final_prompt_messages: Optional[List[Dict[str, str]]] = None
        last_node = None
        last_prompt_messages: Optional[List[Dict[str, str]]] = None
        last_response_text: str = ""
        # Use ManagedServer for automatic token tracking
        async with self._managed() as managed:
@ -384,6 +411,9 @@ class AtroposAgent:
                # Some OpenAI-compatible servers populate `message.reasoning` and leave `content=""`.
                response_text = (msg.content or "") or (getattr(msg, "reasoning", None) or "")
                tool_calls = ToolCall.parse_from_text(response_text)
                last_node = current_node
                last_prompt_messages = prompt_messages
                last_response_text = response_text
                step = AgentStep(
                    step_number=step_num + 1,
@ -419,11 +449,39 @@ class AtroposAgent:
            else:
                # Reached max steps without completing
                # Return a failure result but include the last observed completion so callers can
                # record the trajectory (score=0) without triggering retries.
                final_response = last_response_text or final_response
                final_node = last_node
                final_prompt_messages = last_prompt_messages
                trajectory_data = None
                if final_node:
                    trajectory_data = SequenceData.from_sequence_node(final_node)
                elif final_prompt_messages is not None and self.tokenizer is not None:
                    if hasattr(self.tokenizer, "apply_chat_template"):
                        prompt_text = self.tokenizer.apply_chat_template(
                            final_prompt_messages, tokenize=False, add_generation_prompt=True
                        )
                        prompt_tokens = self.tokenizer.encode(prompt_text, add_special_tokens=False)
                    else:
                        prompt_text = "\n".join([f"{m['role']}: {m['content']}" for m in final_prompt_messages])
                        prompt_tokens = self.tokenizer.encode(prompt_text, add_special_tokens=True)
                    output_tokens = self.tokenizer.encode(final_response, add_special_tokens=False)
                    tokens = prompt_tokens + output_tokens
                    masked_tokens = ([-100] * len(prompt_tokens)) + output_tokens
                    logprobs = ([1.0] * len(prompt_tokens)) + ([0.0] * len(output_tokens))
                    trajectory_data = SequenceData(
                        full_text=f"{prompt_text}{final_response}",
                        tokens=tokens,
                        masked_tokens=masked_tokens,
                        logprobs=logprobs,
                    )
                return AgentResult(
                    success=False,
                    final_response=final_response,
                    steps=steps,
                    error=f"Reached maximum steps ({self.config.max_steps})",
                    trajectory_data=trajectory_data,
                )
        # Build result with trajectory data
--- a/atropos/envs/agent_env.py
+++ b/atropos/envs/agent_env.py
@ -362,10 +362,37 @@ class AgentEnv(BaseEnv, ABC, Generic[AgentEnvConfigT]):
                flush=True,
            )
            if not result.success or result.trajectory_data is None:
-                return None, []
+                # Do not trigger BaseEnv retries for agent failures.
                # Record the trajectory with score 0.0 so training/eval can see the failure mode.
                messages = [{"role": "system", "content": agent._build_system_prompt()}]  # noqa: SLF001
                messages.append({"role": "user", "content": task})
                for step in result.steps:
                    messages.append({"role": "assistant", "content": step.assistant_message})
                    if step.tool_results:
                        tool_text = "\n".join(r.to_xml() for r in step.tool_results)
                        messages.append({"role": "user", "content": tool_text})
                scored: ScoredDataItem = {
                    "tokens": (result.trajectory_data.tokens if result.trajectory_data else []),
                    "masks": (result.trajectory_data.masked_tokens if result.trajectory_data else []),
                    "scores": 0.0,
                }
                if self.config.include_messages:
                    # Record a final failure marker as a user-side tool_response-like block so it survives templates.
                    import json
                    err = result.error or "agent_failed"
                    messages.append(
                        {
                            "role": "user",
                            "content": f"<tool_response>{json.dumps({'success': False, 'error': err})}</tool_response>",
                        }
                    )
                    scored["messages"] = messages
                return scored, []
            print(f"[AgentEnv] tid={trajectory_id} verify_and_score_trajectory() start", flush=True)
-            score, _score_metadata = await self.verify_and_score_trajectory(
+            score, score_metadata = await self.verify_and_score_trajectory(
                item,
                result.final_response,
                trajectory_id=trajectory_id,
@ -387,6 +414,14 @@ class AgentEnv(BaseEnv, ABC, Generic[AgentEnvConfigT]):
                    tool_text = "\n".join(r.to_xml() for r in step.tool_results)
                    messages.append({"role": "user", "content": tool_text})
            # Optional: allow env verification to attach additional messages (e.g. install logs).
            if self.config.include_messages and isinstance(score_metadata, dict):
                extra = score_metadata.get("verification_messages")
                if isinstance(extra, list):
                    for m in extra:
                        if isinstance(m, dict) and isinstance(m.get("role"), str) and isinstance(m.get("content"), str):
                            messages.append({"role": m["role"], "content": m["content"]})
            scored: ScoredDataItem = {
                "tokens": result.trajectory_data.tokens,
                "masks": result.trajectory_data.masked_tokens,
--- a/atropos/envs/swe_smith_oracle_env.py
+++ b/atropos/envs/swe_smith_oracle_env.py
@ -234,6 +234,8 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
            "Constraints:\n"
            "- Use a workspace-local virtualenv (e.g. inside the repo at ./.venv) to avoid cross-run contamination.\n"
            "- Use non-interactive commands only.\n\n"
            "- Terminal commands run under POSIX /bin/sh and each tool call runs in a fresh shell (no persisted env vars).\n"
            "  Avoid bash-only `source`; prefer `. .venv/bin/activate` or `.venv/bin/python ...`.\n\n"
            f"{verify_note}\n"
            f"{trunc_note}\n"
            "Problem statement:\n"
@ -365,13 +367,20 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
        *,
        trajectory_id: str,
        exec_tool,
-        agent_result=None,  # noqa: ARG002
+        agent_result=None,
        workspace_meta: Optional[Dict[str, Any]] = None,
    ) -> tuple[float, Dict[str, Any]]:
        _ = trajectory_id
        repo_dir = self._repo_name(item)
        if self.config.verification_mode == "install":
            # Training correctness: do not reward trajectories that never actually used tools.
            if agent_result is not None and getattr(agent_result, "total_tool_calls", 0) <= 0:
                return 0.0, {
                    "verification_mode": "install",
                    "error": "No tool calls were made by the agent",
                }
            print(f"[SweSmithOracleEnv] tid={trajectory_id} verify (install): running pip install -e .", flush=True)
            t0 = time.perf_counter()
            install_cmd = (
@ -394,6 +403,14 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
                "verification_mode": "install",
                "install_success": ok,
                "error": res.error,
                "verification_messages": [{"role": "user", "content": res.to_xml()}],
            }
        # Training correctness: do not reward trajectories that never actually used tools.
        if agent_result is not None and getattr(agent_result, "total_tool_calls", 0) <= 0:
            return 0.0, {
                "verification_mode": "pytest",
                "error": "No tool calls were made by the agent",
            }
        nodeids = self._tests_for_item(item)
@ -412,12 +429,14 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
        setup_res = await exec_tool(
            ToolCall(name="terminal", arguments={"command": setup_cmd, "timeout": self.config.install_timeout_s})
        )
        verification_messages = [{"role": "user", "content": setup_res.to_xml()}]
        if not setup_res.success:
            return 0.0, {
                "verification_mode": "pytest",
                "phase": "install",
                "error": setup_res.error,
                "output": setup_res.output,
                "verification_messages": verification_messages,
            }
        chunks = self._chunk_nodeids(nodeids, max_per_chunk=50)
@ -430,10 +449,18 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
                    arguments={"command": cmd, "timeout": self.config.test_timeout_s},
                )
            )
            verification_messages.append({"role": "user", "content": res.to_xml()})
            if not res.success:
-                return 0.0, {"failed_chunk": chunk_idx, "error": res.error, "output": res.output}
+                return 0.0, {
                    "verification_mode": "pytest",
                    "phase": "pytest",
                    "failed_chunk": chunk_idx,
                    "error": res.error,
                    "output": res.output,
                    "verification_messages": verification_messages,
                }
-        return 1.0, {"verification_mode": "pytest", "passed": True}
+        return 1.0, {"verification_mode": "pytest", "passed": True, "verification_messages": verification_messages}
    async def score_trajectory(self, item: Item, final_response: str) -> float:
        # Not used; scoring happens in verify_and_score_trajectory.
--- a/atropos/tools/sandbox_stubs.py
+++ b/atropos/tools/sandbox_stubs.py
@ -23,7 +23,9 @@ class TerminalTool(Tool):
            description=(
                "Execute a command inside the sandbox slot workspace and return stdout/stderr. "
                "Filesystem persists within a trajectory slot. Background processes are not supported "
-                "in stateless mode."
+                "in stateless mode. Commands run under POSIX /bin/sh and each tool call runs in a fresh "
                "shell (no persisted env vars). Avoid bash-only syntax like `source`; prefer `. .venv/bin/activate` "
                "or invoke `.venv/bin/python ...` directly."
            ),
            parameters={
                "command": {"type": "string", "description": "The command to execute"},
@ -95,4 +97,3 @@ class WriteFileTool(Tool):
    async def execute(self, **_kwargs) -> ToolResult:
        return ToolResult(success=False, error="write_file must be executed via ToolExecutor inside the sandbox")