Merge branch '6592-retry-if-cleanupfail' closes #6592
authorTom Clegg <tom@curoverse.com>
Thu, 30 Jul 2015 19:37:03 +0000 (15:37 -0400)
committerTom Clegg <tom@curoverse.com>
Thu, 30 Jul 2015 19:37:03 +0000 (15:37 -0400)
12 files changed:
sdk/cli/bin/crunch-job
sdk/cli/test/binstub_clean_fail/mount [new file with mode: 0755]
sdk/cli/test/binstub_docker_noop/docker.io [new file with mode: 0755]
sdk/cli/test/binstub_sanity_check/docker.io [new file with mode: 0755]
sdk/cli/test/binstub_sanity_check/true [new file with mode: 0755]
sdk/cli/test/test_arv-collection-create.rb
sdk/cli/test/test_arv-get.rb
sdk/cli/test/test_arv-put.rb
sdk/cli/test/test_arv-run-pipeline-instance.rb
sdk/cli/test/test_arv-tag.rb
sdk/cli/test/test_crunch-job.rb [new file with mode: 0644]
sdk/perl/Makefile.PL

index c84d89bf4a5533c0549789fb88f19dd8de1b1ee2..13001e7f92d5962fa9917dc74bc96b4710f953ff 100755 (executable)
@@ -126,7 +126,7 @@ my $jobspec;
 my $job_api_token;
 my $no_clear_tmp;
 my $resume_stash;
-my $docker_bin = "/usr/bin/docker.io";
+my $docker_bin = "docker.io";
 GetOptions('force-unlock' => \$force_unlock,
            'git-dir=s' => \$git_dir,
            'job=s' => \$jobspec,
@@ -169,8 +169,7 @@ if ($jobspec =~ /^[-a-z\d]+$/)
 }
 else
 {
-  $Job = JSON::decode_json($jobspec);
-  $local_job = 1;
+  $local_job = JSON::decode_json($jobspec);
 }
 
 
@@ -178,7 +177,7 @@ else
 # at least able to run basic commands: they aren't down or severely
 # misconfigured.
 my $cmd = ['true'];
-if ($Job->{docker_image_locator}) {
+if (($Job || $local_job)->{docker_image_locator}) {
   $cmd = [$docker_bin, 'ps', '-q'];
 }
 Log(undef, "Sanity check is `@$cmd`");
@@ -208,15 +207,15 @@ else
 {
   if (!$resume_stash)
   {
-    map { croak ("No $_ specified") unless $Job->{$_} }
+    map { croak ("No $_ specified") unless $local_job->{$_} }
     qw(script script_version script_parameters);
   }
 
-  $Job->{'is_locked_by_uuid'} = $User->{'uuid'};
-  $Job->{'started_at'} = gmtime;
-  $Job->{'state'} = 'Running';
+  $local_job->{'is_locked_by_uuid'} = $User->{'uuid'};
+  $local_job->{'started_at'} = gmtime;
+  $local_job->{'state'} = 'Running';
 
-  $Job = api_call("jobs/create", job => $Job);
+  $Job = api_call("jobs/create", job => $local_job);
 }
 $job_id = $Job->{'uuid'};
 
@@ -396,7 +395,7 @@ if (!defined $no_clear_tmp) {
     # TODO: When #5036 is done and widely deployed, we can get rid of the
     # regular expression and just unmount everything with type fuse.keep.
     srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
-          ['bash', '-ec', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
+          ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
     exit (1);
   }
   while (1)
@@ -405,7 +404,10 @@ if (!defined $no_clear_tmp) {
     freeze_if_want_freeze ($cleanpid);
     select (undef, undef, undef, 0.1);
   }
-  Log (undef, "Cleanup command exited ".exit_status_s($?));
+  if ($?) {
+    Log(undef, "Clean work dirs: exit ".exit_status_s($?));
+    exit(EX_RETRY_UNLOCKED);
+  }
 }
 
 # If this job requires a Docker image, install that.
@@ -596,7 +598,7 @@ else {
   unless ($? == 0 && $sha1 =~ /^([0-9a-f]{40})$/) {
     croak("`$gitcmd rev-list` exited "
           .exit_status_s($?)
-          .", '$treeish' not found. Giving up.");
+          .", '$treeish' not found, giving up");
   }
   $commit = $1;
   Log(undef, "Version $treeish is commit $commit");
diff --git a/sdk/cli/test/binstub_clean_fail/mount b/sdk/cli/test/binstub_clean_fail/mount
new file mode 100755 (executable)
index 0000000..961ac28
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh
+echo >&2 Failing mount stub was called
+exit 1
diff --git a/sdk/cli/test/binstub_docker_noop/docker.io b/sdk/cli/test/binstub_docker_noop/docker.io
new file mode 100755 (executable)
index 0000000..af3a4e4
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+true
diff --git a/sdk/cli/test/binstub_sanity_check/docker.io b/sdk/cli/test/binstub_sanity_check/docker.io
new file mode 100755 (executable)
index 0000000..8f1569d
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+exit 8
diff --git a/sdk/cli/test/binstub_sanity_check/true b/sdk/cli/test/binstub_sanity_check/true
new file mode 100755 (executable)
index 0000000..4b88b91
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+exit 7
index 18bef403b761f52701fdc86b2919dac44de59e13..3dc4bdd434a101507fee3ebd8f2e5004e66cd49c 100644 (file)
@@ -7,8 +7,6 @@ class TestCollectionCreate < Minitest::Test
   end
 
   def test_small_collection
-    skip "Waiting unitl #4534 is implemented"
-
     uuid = Digest::MD5.hexdigest(foo_manifest) + '+' + foo_manifest.size.to_s
     out, err = capture_subprocess_io do
       assert_arv('--format', 'uuid', 'collection', 'create', '--collection', {
index 67dd399a2456fe4a7c2a2a2cf4d86401d409e6d6..5e58014cbfa10d3b9b67a8b7cddca8b8676f646c 100644 (file)
@@ -30,14 +30,10 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_dev_stdout
-    skip "Waiting unitl #4534 is implemented"
-
     test_file_to_stdout('/dev/stdout')
   end
 
   def test_file_to_stdout(specify_stdout_as='-')
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
     end
@@ -46,8 +42,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_file
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
@@ -58,34 +52,30 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_file_no_overwrite_file
-    skip "Waiting unitl #4534 is implemented"
     File.open './tmp/foo', 'wb' do |f|
       f.write 'baz'
     end
     out, err = capture_subprocess_io do
       assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
     end
-    assert_match /Error:/, err
+    assert_match /Local file tmp\/foo already exists/, err
     assert_equal '', out
     assert_equal 'baz', IO.read('tmp/foo')
   end
 
   def test_file_to_file_no_overwrite_file_in_dir
-    skip "Waiting unitl #4534 is implemented"
     File.open './tmp/foo', 'wb' do |f|
       f.write 'baz'
     end
     out, err = capture_subprocess_io do
       assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
     end
-    assert_match /Error:/, err
+    assert_match /Local file tmp\/foo already exists/, err
     assert_equal '', out
     assert_equal 'baz', IO.read('tmp/foo')
   end
 
   def test_file_to_file_force_overwrite
-    skip "Waiting unitl #4534 is implemented"
-
     File.open './tmp/foo', 'wb' do |f|
       f.write 'baz'
     end
@@ -99,8 +89,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_file_skip_existing
-    skip "Waiting unitl #4534 is implemented"
-
     File.open './tmp/foo', 'wb' do |f|
       f.write 'baz'
     end
@@ -114,8 +102,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_dir
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
@@ -142,28 +128,22 @@ class TestArvGet < Minitest::Test
   end
 
   def test_nonexistent_block
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
-      assert_arv_get false, 'f1554a91e925d6213ce7c3103c5110c6'
+      assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
     end
     assert_equal '', out
     assert_match /Error:/, err
   end
 
   def test_nonexistent_manifest
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
-      assert_arv_get false, 'f1554a91e925d6213ce7c3103c5110c6/', 'tmp/'
+      assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
     end
     assert_equal '', out
     assert_match /Error:/, err
   end
 
   def test_manifest_root_to_dir
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
@@ -174,8 +154,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_manifest_root_to_dir_noslash
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
@@ -186,8 +164,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_display_md5sum
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
@@ -198,8 +174,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_md5sum_nowrite
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
@@ -210,8 +184,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_sha1_nowrite
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
@@ -222,8 +194,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_block_to_file
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get @@foo_manifest_locator, 'tmp/foo'
@@ -236,8 +206,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_create_directory_tree
-    skip "Waiting unitl #4534 is implemented"
-
     `rm -rf ./tmp/arv-get-test/`
     Dir.mkdir './tmp/arv-get-test'
     out, err = capture_subprocess_io do
@@ -249,8 +217,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_create_partial_directory_tree
-    skip "Waiting unitl #4534 is implemented"
-
     `rm -rf ./tmp/arv-get-test/`
     Dir.mkdir './tmp/arv-get-test'
     out, err = capture_subprocess_io do
index 73513db56cb17ee5f6d88d151205f437e6d22107..2f20e18440a2ff61dde6b748d3b327587530b142 100644 (file)
@@ -22,8 +22,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_raw_stdin
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       r,w = IO.pipe
       wpid = fork do
@@ -41,8 +39,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_raw_file
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--raw', './tmp/foo')
     end
@@ -52,8 +48,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_raw_empty_file
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--raw', './tmp/empty_file')
     end
@@ -83,8 +77,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_filename_arg_with_empty_file
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--filename', 'foo', './tmp/empty_file')
     end
@@ -94,8 +86,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_as_stream
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--as-stream', './tmp/foo')
     end
@@ -105,8 +95,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_progress
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--manifest', '--progress', './tmp/foo')
     end
@@ -115,8 +103,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_batch_progress
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--manifest', '--batch-progress', './tmp/foo')
     end
@@ -136,20 +122,14 @@ class TestArvPut < Minitest::Test
   end
 
   def test_read_from_implicit_stdin
-    skip "Waiting unitl #4534 is implemented"
-
     test_read_from_stdin(specify_stdin_as='--manifest')
   end
 
   def test_read_from_dev_stdin
-    skip "Waiting unitl #4534 is implemented"
-
     test_read_from_stdin(specify_stdin_as='/dev/stdin')
   end
 
   def test_read_from_stdin(specify_stdin_as='-')
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       r,w = IO.pipe
       wpid = fork do
@@ -168,22 +148,16 @@ class TestArvPut < Minitest::Test
   end
 
   def test_read_from_implicit_stdin_implicit_manifest
-    skip "Waiting unitl #4534 is implemented"
-
     test_read_from_stdin_implicit_manifest(specify_stdin_as=nil,
                                            expect_filename='stdin')
   end
 
   def test_read_from_dev_stdin_implicit_manifest
-    skip "Waiting unitl #4534 is implemented"
-
     test_read_from_stdin_implicit_manifest(specify_stdin_as='/dev/stdin')
   end
 
   def test_read_from_stdin_implicit_manifest(specify_stdin_as='-',
                                              expect_filename=nil)
-    skip "Waiting unitl #4534 is implemented"
-
     expect_filename = expect_filename || specify_stdin_as.split('/').last
     out, err = capture_subprocess_io do
       r,w = IO.pipe
index 8c8d1d8331ae05fcbda64a65289732188c66bcd8..cac89b37bc0555c4929c6efadf873c32aed01297 100644 (file)
@@ -5,8 +5,6 @@ class TestRunPipelineInstance < Minitest::Test
   end
 
   def test_run_pipeline_instance_get_help
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       system ('arv-run-pipeline-instance -h')
     end
index a5a1c94fff29227e0944afcae08383529cfe0b33..f4eba4651cbcd06494c41d1e05311dac663f65ed 100644 (file)
@@ -9,7 +9,7 @@ end
 class TestArvTag < Minitest::Test
 
   def test_no_args
-    skip "Waiting unitl #4534 is implemented"
+    skip "Waiting until #4534 is implemented"
 
     # arv-tag exits with failure if run with no args
     out, err = capture_subprocess_io do
diff --git a/sdk/cli/test/test_crunch-job.rb b/sdk/cli/test/test_crunch-job.rb
new file mode 100644 (file)
index 0000000..22d756a
--- /dev/null
@@ -0,0 +1,126 @@
+require 'minitest/autorun'
+
+class TestCrunchJob < Minitest::Test
+  SPECIAL_EXIT = {
+    EX_RETRY_UNLOCKED: 93,
+    EX_TEMPFAIL: 75,
+  }
+
+  JOBSPEC = {
+    grep_local: {
+      script: 'grep',
+      script_version: 'master',
+      repository: File.absolute_path('../../../..', __FILE__),
+      script_parameters: {foo: 'bar'},
+    },
+  }
+
+  def setup
+  end
+
+  def crunchjob
+    File.absolute_path '../../bin/crunch-job', __FILE__
+  end
+
+  # Return environment suitable for running crunch-job.
+  def crunchenv opts={}
+    env = ENV.to_h
+    env['CRUNCH_REFRESH_TRIGGER'] =
+      File.absolute_path('../../../../tmp/crunch-refresh-trigger', __FILE__)
+    env
+  end
+
+  def jobspec label
+    JOBSPEC[label].dup
+  end
+
+  # Encode job record to json and run it with crunch-job.
+  #
+  # opts[:binstubs] is an array of X where ./binstub_X is added to
+  # PATH in order to mock system programs.
+  def tryjobrecord jobrecord, opts={}
+    env = crunchenv
+    (opts[:binstubs] || []).each do |binstub|
+      env['PATH'] = File.absolute_path('../binstub_'+binstub, __FILE__) + ':' + env['PATH']
+    end
+    system env, crunchjob, '--job', jobrecord.to_json
+  end
+
+  def test_bogus_json
+    out, err = capture_subprocess_io do
+      system crunchenv, crunchjob, '--job', '"}{"'
+    end
+    assert_equal false, $?.success?
+    # Must not conflict with our special exit statuses
+    assert_jobfail $?
+    assert_match /JSON/, err
+  end
+
+  def test_fail_sanity_check
+    out, err = capture_subprocess_io do
+      j = {}
+      tryjobrecord j, binstubs: ['sanity_check']
+    end
+    assert_equal 75, $?.exitstatus
+    assert_match /Sanity check failed: 7/, err
+  end
+
+  def test_fail_docker_sanity_check
+    out, err = capture_subprocess_io do
+      j = {}
+      j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
+      tryjobrecord j, binstubs: ['sanity_check']
+    end
+    assert_equal 75, $?.exitstatus
+    assert_match /Sanity check failed: 8/, err
+  end
+
+  def test_no_script_specified
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      j.delete :script
+      tryjobrecord j
+    end
+    assert_match /No script specified/, err
+    assert_jobfail $?
+  end
+
+  def test_fail_clean_tmp
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      tryjobrecord j, binstubs: ['clean_fail']
+    end
+    assert_match /Failing mount stub was called/, err
+    assert_match /Clean work dirs: exit 1\n$/, err
+    assert_equal SPECIAL_EXIT[:EX_RETRY_UNLOCKED], $?.exitstatus
+  end
+
+  def test_docker_image_missing
+    skip 'API bug: it refuses to create this job in Running state'
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
+      tryjobrecord j, binstubs: ['docker_noop']
+    end
+    assert_match /No Docker image hash found from locator/, err
+    assert_jobfail $?
+  end
+
+  def test_script_version_not_found_in_repository
+    bogus_version = 'f8b72707c1f5f740dbf1ed56eb429a36e0dee770'
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      j[:script_version] = bogus_version
+      tryjobrecord j
+    end
+    assert_match /'#{bogus_version}' not found, giving up/, err
+    assert_jobfail $?
+  end
+
+  # Ensure procstatus is not interpreted as a temporary infrastructure
+  # problem. Would be assert_http_4xx if this were http.
+  def assert_jobfail procstatus
+    refute_includes SPECIAL_EXIT.values, procstatus.exitstatus
+    assert_equal false, procstatus.success?
+  end
+end
index 21e31ad8055ce27e84f303c538789da050c021f6..d676d37a8a7dfb54fbb0429a548b4e5b90cac6e4 100644 (file)
@@ -6,5 +6,10 @@ use ExtUtils::MakeMaker;
 
 WriteMakefile(
     NAME            => 'Arvados',
-    VERSION_FROM    => 'lib/Arvados.pm'
+    VERSION_FROM    => 'lib/Arvados.pm',
+    PREREQ_PM       => {
+        'JSON'     => 0,
+        'LWP'      => 0,
+        'Net::SSL' => 0,
+    },
 );