diff --git a/batch_runner.py b/batch_runner.py
index 9d21aebc35..2487d9fb19 100644
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -98,10 +98,9 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i
                     # Terminal wraps its response in a "content" field
                     if "content" in content_json and isinstance(content_json["content"], dict):
                         inner_content = content_json["content"]
-                        # Check for actual error (non-null error field or non-zero exit code)
-                        has_error = (inner_content.get("error") is not None or 
-                                   inner_content.get("exit_code", 0) != 0)
-                        if has_error:
+                        # Check for actual error (non-null error field)
+                        # Note: non-zero exit codes are not failures - the model can self-correct
+                        if inner_content.get("error") is not None:
                             is_success = False
                     
                     # Check for "success": false pattern used by some tools