Merge branch 'master' into 6518-crunch2-dispatch-slurm
[arvados.git] / sdk / cli / bin / crunch-job
index 1a9dac30081ef03f55d5c1ce49c48898c63224c3..7c2855c5ea72829d0e8a8cd73a849b6feaa44c56 100755 (executable)
@@ -432,7 +432,7 @@ fi
 
   # Determine whether this version of Docker supports memory+swap limits.
   ($exited, $stdout, $stderr) = srun_sync(
-    ["srun", "--nodelist=" . $node[0]],
+    ["srun", "--nodes=1"],
     [$docker_bin, 'run', '--help'],
     {label => "check --memory-swap feature"});
   $docker_limitmem = ($stdout =~ /--memory-swap/);
@@ -455,7 +455,7 @@ fi
       $try_user_arg = "--user=$try_user";
     }
     my ($exited, $stdout, $stderr) = srun_sync(
-      ["srun", "--nodelist=" . $node[0]],
+      ["srun", "--nodes=1"],
       ["/bin/sh", "-ec",
        "$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
       {label => $label});
@@ -1134,7 +1134,9 @@ sub update_progress_stats
 sub reapchildren
 {
   my $children_reaped = 0;
-  while ((my $pid = waitpid (-1, WNOHANG)) > 0)
+  my @successful_task_uuids = ();
+
+  while((my $pid = waitpid (-1, WNOHANG)) > 0)
   {
     my $childstatus = $?;
 
@@ -1207,8 +1209,9 @@ sub reapchildren
       push @jobstep_todo, $jobstepidx;
       $Job->{'tasks_summary'}->{'failed'}++;
     }
-    else
+    else # task_success
     {
+      push @successful_task_uuids, $Jobstep->{'arvados_task'}->{uuid};
       ++$thisround_succeeded;
       $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
       $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
@@ -1231,34 +1234,36 @@ sub reapchildren
     push @freeslot, $proc{$pid}->{slot};
     delete $proc{$pid};
 
-    if ($task_success) {
-      # Load new tasks
-      my $newtask_list = [];
-      my $newtask_results;
-      do {
-        $newtask_results = api_call(
-          "job_tasks/list",
-          'where' => {
-            'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
-          },
-          'order' => 'qsequence',
-          'offset' => scalar(@$newtask_list),
-            );
-        push(@$newtask_list, @{$newtask_results->{items}});
-      } while (@{$newtask_results->{items}});
-      foreach my $arvados_task (@$newtask_list) {
-        my $jobstep = {
-          'level' => $arvados_task->{'sequence'},
-          'failures' => 0,
-          'arvados_task' => $arvados_task
-        };
-        push @jobstep, $jobstep;
-        push @jobstep_todo, $#jobstep;
-      }
-    }
     $progress_is_dirty = 1;
   }
 
+  if (scalar(@successful_task_uuids) > 0)
+  {
+    Log (undef, sprintf("%d tasks exited (%d succeeded), checking for new tasks from API server.", $children_reaped, scalar(@successful_task_uuids)));
+    # Load new tasks
+    my $newtask_list = [];
+    my $newtask_results;
+    do {
+      $newtask_results = api_call(
+        "job_tasks/list",
+        'filters' => [["created_by_job_task_uuid","in",\@successful_task_uuids]],
+        'order' => 'qsequence',
+        'offset' => scalar(@$newtask_list),
+          );
+      push(@$newtask_list, @{$newtask_results->{items}});
+    } while (@{$newtask_results->{items}});
+    Log (undef, sprintf("Got %d new tasks from API server.", scalar(@$newtask_list)));
+    foreach my $arvados_task (@$newtask_list) {
+      my $jobstep = {
+        'level' => $arvados_task->{'sequence'},
+        'failures' => 0,
+        'arvados_task' => $arvados_task
+      };
+      push @jobstep, $jobstep;
+      push @jobstep_todo, $#jobstep;
+    }
+  }
+
   return $children_reaped;
 }