Merge branch 'master' into 8099-babysit-all-srun
authorTom Clegg <tom@curoverse.com>
Wed, 2 Mar 2016 17:16:19 +0000 (12:16 -0500)
committerTom Clegg <tom@curoverse.com>
Wed, 2 Mar 2016 17:16:19 +0000 (12:16 -0500)
Conflicts:
sdk/cli/bin/crunch-job

1  2 
sdk/cli/bin/crunch-job

index 14dac6a1437e04b428c2b9ef6aad4b7825b8560e,ca6c47bfcef2093f478836263d2d1e0355b1f132..1a9dac30081ef03f55d5c1ce49c48898c63224c3
@@@ -1472,11 -1482,11 +1474,11 @@@ sub preprocess_stder
        ban_node_by_slot($job_slot_index);
      }
      elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
 -      $jobstep[$job]->{tempfail} = 1;
 -      ban_node_by_slot($jobstep[$job]->{slotindex});
 +      $jobstep[$jobstepidx]->{tempfail} = 1;
 +      ban_node_by_slot($jobstep[$jobstepidx]->{slotindex});
      }
-     elsif ($line =~ /arvados\.errors\.Keep/) {
+     elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
 -      $jobstep[$job]->{tempfail} = 1;
 +      $jobstep[$jobstepidx]->{tempfail} = 1;
      }
    }
  }