Merge branch 'master' into 14715-keepprox-config
authorEric Biagiotti <ebiagiotti@veritasgenetics.com>
Wed, 14 Aug 2019 14:49:48 +0000 (10:49 -0400)
committerEric Biagiotti <ebiagiotti@veritasgenetics.com>
Wed, 14 Aug 2019 14:49:48 +0000 (10:49 -0400)
refs #14715

Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti@veritasgenetics.com>

202 files changed:
apps/workbench/fpm-info.sh
apps/workbench/test/controllers/pipeline_instances_controller_test.rb
apps/workbench/test/controllers/projects_controller_test.rb
apps/workbench/test/helpers/repository_stub_helper.rb
apps/workbench/test/integration/application_layout_test.rb
apps/workbench/test/integration/jobs_test.rb
apps/workbench/test/integration/pipeline_instances_test.rb
apps/workbench/test/integration/pipeline_templates_test.rb
apps/workbench/test/integration/projects_test.rb
apps/workbench/test/integration/repositories_browse_test.rb
apps/workbench/test/integration/websockets_test.rb
apps/workbench/test/integration/work_units_test.rb
apps/workbench/test/unit/disabled_api_test.rb
build/package-build-dockerfiles/centos7/Dockerfile
build/package-build-dockerfiles/debian9/Dockerfile
build/package-build-dockerfiles/ubuntu1604/Dockerfile
build/package-build-dockerfiles/ubuntu1804/Dockerfile
build/run-build-packages-one-target.sh
build/run-build-packages.sh
build/run-build-test-packages-one-target.sh
build/run-library.sh
cmd/arvados-client/cmd.go
crunch_scripts/GATK2-VariantFiltration [deleted file]
crunch_scripts/GATK2-bqsr [deleted file]
crunch_scripts/GATK2-merge-call [deleted file]
crunch_scripts/GATK2-realign [deleted file]
crunch_scripts/arvados-bcbio-nextgen.py [deleted file]
crunch_scripts/arvados_bwa.py [deleted file]
crunch_scripts/arvados_gatk2.py [deleted file]
crunch_scripts/arvados_ipc.py [deleted file]
crunch_scripts/arvados_picard.py [deleted file]
crunch_scripts/arvados_samtools.py [deleted file]
crunch_scripts/bwa-aln [deleted file]
crunch_scripts/bwa-index [deleted file]
crunch_scripts/collection-merge [deleted file]
crunch_scripts/crunchrunner [deleted file]
crunch_scripts/crunchutil/__init__.py [deleted file]
crunch_scripts/crunchutil/robust_put.py [deleted file]
crunch_scripts/crunchutil/subst.py [deleted file]
crunch_scripts/crunchutil/vwd.py [deleted file]
crunch_scripts/cwl-runner [deleted file]
crunch_scripts/decompress-all.py [deleted file]
crunch_scripts/file-select [deleted file]
crunch_scripts/grep [deleted file]
crunch_scripts/hash [deleted file]
crunch_scripts/pgp-survey-import [deleted file]
crunch_scripts/pgp-survey-parse [deleted file]
crunch_scripts/picard-gatk2-prep [deleted file]
crunch_scripts/pyrtg.py [deleted file]
crunch_scripts/rtg-fasta2sdf [deleted file]
crunch_scripts/rtg-fastq2sdf [deleted file]
crunch_scripts/rtg-map [deleted file]
crunch_scripts/rtg-snp [deleted file]
crunch_scripts/run-command [deleted file]
crunch_scripts/split-fastq.py [deleted file]
crunch_scripts/test/task_output_dir [deleted file]
doc/_config.yml
doc/_includes/_arv_run_redirection.liquid [deleted file]
doc/admin/upgrading.html.textile.liquid
doc/api/crunch-scripts.html.textile.liquid
doc/api/execution.html.textile.liquid
doc/api/methods/humans.html.textile.liquid
doc/api/methods/job_tasks.html.textile.liquid
doc/api/methods/jobs.html.textile.liquid
doc/api/methods/pipeline_instances.html.textile.liquid
doc/api/methods/pipeline_templates.html.textile.liquid
doc/api/methods/specimens.html.textile.liquid
doc/api/methods/traits.html.textile.liquid
doc/install/install-compute-node.html.textile.liquid [deleted file]
doc/install/install-crunch-dispatch.html.textile.liquid [deleted file]
doc/install/install-keep-web.html.textile.liquid
doc/sdk/cli/subcommands.html.textile.liquid
doc/sdk/python/crunch-utility-libraries.html.textile.liquid [deleted file]
doc/user/cwl/cwl-run-options.html.textile.liquid
doc/user/cwl/cwl-versions.html.textile.liquid
doc/user/examples/crunch-examples.html.textile.liquid [deleted file]
doc/user/reference/job-pipeline-ref.html.textile.liquid [deleted file]
doc/user/topics/arv-run.html.textile.liquid [deleted file]
doc/user/topics/crunch-tools-overview.html.textile.liquid [deleted file]
doc/user/topics/run-command.html.textile.liquid [deleted file]
doc/user/topics/running-pipeline-command-line.html.textile.liquid [deleted file]
doc/user/topics/tutorial-parallel.html.textile.liquid [deleted file]
doc/user/topics/tutorial-trait-search.html.textile.liquid [deleted file]
doc/user/tutorials/running-external-program.html.textile.liquid [deleted file]
doc/user/tutorials/tutorial-firstscript.html.textile.liquid [deleted file]
doc/user/tutorials/tutorial-submit-job.html.textile.liquid [deleted file]
lib/cli/external.go
lib/config/config.default.yml
lib/config/deprecated.go
lib/config/deprecated_test.go
lib/config/export.go
lib/config/generated_config.go
lib/config/load.go
lib/controller/fed_collections.go
lib/controller/federation/conn.go
lib/controller/federation_test.go
sdk/cli/arvados-cli.gemspec
sdk/cli/bin/arv
sdk/cli/bin/arv-crunch-job [deleted file]
sdk/cli/bin/arv-run-pipeline-instance [deleted file]
sdk/cli/bin/crunch-job [deleted file]
sdk/cli/test/test_arv-run-pipeline-instance.rb [deleted file]
sdk/cli/test/test_crunch-job.rb [deleted file]
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvjob.py [deleted file]
sdk/cwl/arvados_cwl/arvtool.py
sdk/cwl/arvados_cwl/crunch_script.py [deleted file]
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/tests/test_container.py
sdk/cwl/tests/test_job.py [deleted file]
sdk/cwl/tests/test_submit.py
sdk/cwl/tests/wf/runin-reqs-wf.cwl
sdk/cwl/tests/wf/runin-reqs-wf2.cwl
sdk/cwl/tests/wf/runin-reqs-wf3.cwl
sdk/cwl/tests/wf/runin-reqs-wf4.cwl
sdk/go/arvados/config.go
sdk/go/arvadosclient/arvadosclient_test.go
sdk/go/crunchrunner/crunchrunner.go [deleted file]
sdk/go/crunchrunner/crunchrunner_test.go [deleted file]
sdk/go/crunchrunner/upload.go [deleted file]
sdk/go/crunchrunner/upload_test.go [deleted file]
sdk/python/arvados/commands/run.py
sdk/python/bin/arv-run [deleted file]
sdk/python/setup.py
sdk/python/tests/nginx.conf
sdk/python/tests/run_test_server.py
sdk/python/tests/test_arv_run.py [deleted file]
sdk/python/tests/test_pipeline_template.py [deleted file]
sdk/python/tests/test_retry_job_helpers.py
services/api/app/controllers/arvados/v1/job_tasks_controller.rb
services/api/app/controllers/arvados/v1/jobs_controller.rb
services/api/app/controllers/arvados/v1/pipeline_instances_controller.rb
services/api/app/controllers/arvados/v1/pipeline_templates_controller.rb
services/api/app/helpers/commit_ancestors_helper.rb [deleted file]
services/api/app/helpers/commits_helper.rb
services/api/app/models/commit.rb [deleted file]
services/api/app/models/commit_ancestor.rb [deleted file]
services/api/app/models/job.rb
services/api/app/models/job_task.rb
services/api/app/models/pipeline_instance.rb
services/api/app/models/pipeline_template.rb
services/api/config/arvados_config.rb
services/api/db/migrate/20190808145904_drop_commit_ancestors.rb [new file with mode: 0644]
services/api/db/migrate/20190809135453_remove_commits_table.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/fpm-info.sh
services/api/lib/can_be_an_owner.rb
services/api/lib/crunch_dispatch.rb [deleted file]
services/api/lib/enable_jobs_api.rb
services/api/script/crunch-dispatch.rb [deleted file]
services/api/script/crunch_failure_report.py [deleted file]
services/api/script/fail-jobs.rb [deleted file]
services/api/test/functional/arvados/v1/job_reuse_controller_test.rb
services/api/test/functional/arvados/v1/jobs_controller_test.rb
services/api/test/functional/arvados/v1/pipeline_instances_controller_test.rb
services/api/test/helpers/git_test_helper.rb
services/api/test/integration/crunch_dispatch_test.rb [deleted file]
services/api/test/integration/jobs_api_test.rb
services/api/test/integration/pipeline_test.rb
services/api/test/integration/serialized_encoding_test.rb
services/api/test/unit/commit_test.rb
services/api/test/unit/crunch_dispatch_test.rb [deleted file]
services/api/test/unit/fail_jobs_test.rb [deleted file]
services/api/test/unit/job_task_test.rb
services/api/test/unit/job_test.rb
services/api/test/unit/pipeline_instance_test.rb
services/fuse/arvados_fuse/__init__.py
services/keep-web/cache.go
services/keep-web/cache_test.go
services/keep-web/cadaver_test.go
services/keep-web/doc.go
services/keep-web/handler.go
services/keep-web/handler_test.go
services/keep-web/keep-web.service
services/keep-web/main.go
services/keep-web/server.go
services/keep-web/server_test.go
services/keep-web/status_test.go
services/keep-web/usage.go [deleted file]
tools/arvbox/lib/arvbox/docker/Dockerfile.base
tools/arvbox/lib/arvbox/docker/cluster-config.sh
tools/arvbox/lib/arvbox/docker/crunch-setup.sh [deleted file]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/main/.gitstub [deleted file]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/run [deleted symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run [deleted symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run-service [deleted file]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/main/.gitstub [deleted file]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/run [deleted symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run [deleted symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run-service [deleted file]
tools/arvbox/lib/arvbox/docker/service/keep-web/run-service
tools/arvbox/lib/arvbox/docker/service/keepproxy/run-service
tools/arvbox/lib/arvbox/docker/service/nginx/run
tools/crunchstat-summary/crunchstat_summary/command.py
tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz [deleted file]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz [new file with mode: 0644]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz.report [moved from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report with 100% similarity]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz [moved from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz with 66% similarity]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz.report [moved from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report with 100% similarity]
tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report [new file with mode: 0644]
tools/crunchstat-summary/tests/test_examples.py

index 48913a14d7fd1929b1ada72fb94cba1ad78be7bf..22ec1ba14c6dad9a83cc3594aa0353f835331e91 100644 (file)
@@ -4,9 +4,9 @@
 
 case "$TARGET" in
     centos*)
-        fpm_depends+=(git arvados-server)
+        fpm_depends+=(git)
         ;;
     debian* | ubuntu*)
-        fpm_depends+=(git g++ arvados-server)
+        fpm_depends+=(git g++)
         ;;
 esac
index c76244d11cd1538e1584701ca4894a7318ef87c4..4067834efa1da88bdd2eab550521b0327fc3d45c 100644 (file)
@@ -28,13 +28,6 @@ class PipelineInstancesControllerTest < ActionController::TestCase
     PipelineInstance.where(uuid: pi_uuid).first.destroy
   end
 
-  test "pipeline instance components populated after create" do
-    create_instance_long_enough_to do |new_instance_uuid, template_fixture|
-      assert_equal(template_fixture['components'].to_json,
-                   assigns(:object).components.to_json)
-    end
-  end
-
   test "can render pipeline instance with tagged collections" do
     # Make sure to pass in a tagged collection to test that part of the rendering behavior.
     get(:show,
@@ -43,38 +36,6 @@ class PipelineInstancesControllerTest < ActionController::TestCase
     assert_response :success
   end
 
-  test "update script_parameters one at a time using merge param" do
-      template_fixture = api_fixture('pipeline_templates')['two_part']
-      post :update, params: {
-        id: api_fixture("pipeline_instances")["pipeline_to_merge_params"]["uuid"],
-        pipeline_instance: {
-          components: {
-            "part-two" => {
-              script_parameters: {
-                integer_with_value: {
-                  value: 9
-                },
-                plain_string: {
-                  value: 'quux'
-                },
-              }
-            }
-          }
-        },
-        merge: true,
-        format: :json
-      }, session: session_for(:active)
-      assert_response :success
-      assert_not_nil assigns(:object)
-      orig_params = template_fixture['components']['part-two']['script_parameters']
-      new_params = assigns(:object).components[:'part-two'][:script_parameters]
-      orig_params.keys.each do |k|
-        unless %w(integer_with_value plain_string).index(k)
-          assert_equal orig_params[k].to_json, new_params[k.to_sym].to_json
-        end
-      end
-  end
-
   test "component rendering copes with unexpected components format" do
     get(:show,
         params: {id: api_fixture("pipeline_instances")["components_is_jobspec"]["uuid"]},
@@ -101,70 +62,6 @@ class PipelineInstancesControllerTest < ActionController::TestCase
   # when the template has components that do not exist in the
   # instance (ticket #4000).
 
-  test "copy pipeline instance with components=use_latest" do
-    post(:copy,
-         params: {
-           id: api_fixture('pipeline_instances')['pipeline_with_newer_template']['uuid'],
-           components: 'use_latest',
-           script: 'use_latest',
-           pipeline_instance: {
-             state: 'RunningOnServer'
-           }
-         },
-         session: session_for(:active))
-    assert_response 302
-    assert_not_nil assigns(:object)
-
-    # Component 'foo' has script parameters only in the pipeline instance.
-    # Component 'bar' is present only in the pipeline_template.
-    # Test that the copied pipeline instance includes parameters for
-    # component 'foo' from the source instance, and parameters for
-    # component 'bar' from the source template.
-    #
-    assert_not_nil assigns(:object).components[:foo]
-    foo = assigns(:object).components[:foo]
-    assert_not_nil foo[:script_parameters]
-    assert_not_nil foo[:script_parameters][:input]
-    assert_equal 'foo instance input', foo[:script_parameters][:input][:title]
-
-    assert_not_nil assigns(:object).components[:bar]
-    bar = assigns(:object).components[:bar]
-    assert_not_nil bar[:script_parameters]
-    assert_not_nil bar[:script_parameters][:input]
-    assert_equal 'bar template input', bar[:script_parameters][:input][:title]
-  end
-
-  test "copy pipeline instance on newer template works with script=use_same" do
-    post(:copy,
-         params: {
-           id: api_fixture('pipeline_instances')['pipeline_with_newer_template']['uuid'],
-           components: 'use_latest',
-           script: 'use_same',
-           pipeline_instance: {
-             state: 'RunningOnServer'
-           }
-         },
-         session: session_for(:active))
-    assert_response 302
-    assert_not_nil assigns(:object)
-
-    # Test that relevant component parameters were copied from both
-    # the source instance and source template, respectively (see
-    # previous test)
-    #
-    assert_not_nil assigns(:object).components[:foo]
-    foo = assigns(:object).components[:foo]
-    assert_not_nil foo[:script_parameters]
-    assert_not_nil foo[:script_parameters][:input]
-    assert_equal 'foo instance input', foo[:script_parameters][:input][:title]
-
-    assert_not_nil assigns(:object).components[:bar]
-    bar = assigns(:object).components[:bar]
-    assert_not_nil bar[:script_parameters]
-    assert_not_nil bar[:script_parameters][:input]
-    assert_equal 'bar template input', bar[:script_parameters][:input][:title]
-  end
-
   test "generate graph" do
 
     use_token 'admin'
index 750c779ae7ca2d537911feabfa25484b85338443..0f79168901364f72c232c85cd981eb0540efd084 100644 (file)
@@ -143,32 +143,6 @@ class ProjectsControllerTest < ActionController::TestCase
     assert_equal api_fixture('users', 'subproject_admin')['uuid'], new_specimen.owner_uuid
   end
 
-  # An object which does not offer an expired_at field but has a xx_owner_uuid_name_unique constraint
-  # will be renamed when removed and another object with the same name exists in user's home project.
-  [
-    ['pipeline_templates', 'template_in_asubproject_with_same_name_as_one_in_active_user_home'],
-  ].each do |dm, fixture|
-    test "removing #{dm} from a subproject results in renaming it when there is another such object with same name in home project" do
-      object = api_fixture(dm, fixture)
-      delete(:remove_item,
-             params: { id: api_fixture('groups', 'asubproject')['uuid'],
-               item_uuid: object['uuid'],
-               format: 'js' },
-             session: session_for(:active))
-      assert_response :success
-      assert_match(/\b#{object['uuid']}\b/, @response.body,
-                   "removed object not named in response")
-      use_token :active
-      if dm.eql?('groups')
-        found = Group.find(object['uuid'])
-      else
-        found = PipelineTemplate.find(object['uuid'])
-      end
-      assert_equal api_fixture('users', 'active')['uuid'], found.owner_uuid
-      assert_equal true, found.name.include?(object['name'] + ' removed from ')
-    end
-  end
-
   test 'projects#show tab infinite scroll partial obeys limit' do
     get_contents_rows(limit: 1, filters: [['uuid','is_a',['arvados#job']]])
     assert_response :success
index 419de8c5e41287ee36370852a02a57c97512dc2f..a8e3653a523e4c67286d7378e7da191a7cf34db6 100644 (file)
@@ -26,7 +26,6 @@ module RepositoryStubHelper
       100644 blob 9bef02bbfda670595750fd99a4461005ce5b8f12     695    apps/workbench/.gitignore
       100644 blob b51f674d90f68bfb50d9304068f915e42b04aea4    2249    apps/workbench/Gemfile
       100644 blob b51f674d90f68bfb50d9304068f915e42b04aea4    2249    apps/workbench/Gemfile
-      100755 blob cdd5ebaff27781f93ab85e484410c0ce9e97770f    1012    crunch_scripts/hash
     EOS
     Repository.any_instance.
       stubs(:cat_file).with(fakesha1, fakefilename).returns fakefile
index 505767814a3c4612e95578886121bdf5d2c7a457..dc958d3b5e23295bd2013de9c0a4af9db419b0d0 100644 (file)
@@ -129,6 +129,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
   ].each do |token, user, invited, has_profile|
 
     test "visit home page for user #{token}" do
+      Rails.configuration.Users.AnonymousUserToken = ""
       if !token
         visit ('/')
       else
@@ -237,6 +238,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
 
   test "no SSH public key notification when shell_in_a_box_url is configured" do
     Rails.configuration.Services.WebShell.ExternalURL = URI('http://example.com')
+    Rails.configuration.Users.AnonymousUserToken = ""
     visit page_with_token('job_reader')
     click_link 'notifications-menu'
     assert_no_selector 'a', text:'Click here to set up an SSH public key for use with Arvados.'
index a66dfd80301c0ca7cbcf35c4e4075b38da456a38..7b510f2be9347b0a5f5b121b8d984f8ccf41983b 100644 (file)
@@ -20,105 +20,12 @@ class JobsTest < ActionDispatch::IntegrationTest
     StringIO.new content, 'r'
   end
 
-  test "add job description" do
-    job = api_fixture('jobs')['nearly_finished_job']
-    visit page_with_token("active", "/jobs/#{job['uuid']}")
-
-    # edit job description
-    within('.arv-description-as-subtitle') do
-      find('.fa-pencil').click
-      find('.editable-input textarea').set('*Textile description for job* - "Go to dashboard":/')
-      find('.editable-submit').click
-    end
-
-    # Verify edited description
-    assert_no_text '*Textile description for job*'
-    assert_text 'Textile description for job'
-    assert_selector 'a[href="/"]', text: 'Go to dashboard'
-  end
-
-  test 'view partial job log' do
-    need_selenium 'to be able to see the CORS response headers (PhantomJS 1.9.8 does not)'
-
-    # This config will be restored during teardown by ../test_helper.rb:
-    Rails.configuration.Workbench.LogViewerMaxBytes = 100
-
-    logdata = fakepipe_with_log_data.read
-    job_uuid = api_fixture('jobs')['running']['uuid']
-    logcollection = upload_data_and_get_collection(logdata, 'active', "#{job_uuid}.log.txt")
-    job = nil
-    use_token 'active' do
-      job = Job.find job_uuid
-      job.update_attributes log: logcollection.portable_data_hash
-    end
-    visit page_with_token 'active', '/jobs/'+job.uuid
-    find('a[href="#Log"]').click
-    wait_for_ajax
-    assert_text 'Showing only 100 bytes of this log'
-  end
-
-  test 'view log via keep-web redirect' do
-    token = api_token('active')
-    logdata = fakepipe_with_log_data.read
-    logblock = `echo -n #{logdata.shellescape} | ARVADOS_API_TOKEN=#{token.shellescape} arv-put --no-progress --raw -`.strip
-    assert $?.success?, $?
-
-    job = nil
-    use_token 'active' do
-      job = Job.find api_fixture('jobs')['running']['uuid']
-      mtxt = ". #{logblock} 0:#{logdata.length}:#{job.uuid}.log.txt\n"
-      logcollection = Collection.create(manifest_text: mtxt)
-      job.update_attributes log: logcollection.portable_data_hash
-    end
-    visit page_with_token 'active', '/jobs/'+job.uuid
-    find('a[href="#Log"]').click
-    assert_text 'log message 1'
-  end
-
-  [
-    ['foobar', false, false],
-    ['job_with_latest_version', true, false],
-    ['job_with_latest_version', true, true],
-  ].each do |job_name, expect_options, use_latest|
-    test "Rerun #{job_name} job, expect options #{expect_options},
-          and use latest version option #{use_latest}" do
-      job = api_fixture('jobs')[job_name]
-      visit page_with_token 'active', '/jobs/'+job['uuid']
-
-      if expect_options
-        assert_text 'supplied_script_version: master'
-      else
-        assert_no_text 'supplied_script_version'
-      end
-
-      assert_triggers_dom_event 'shown.bs.modal' do
-        find('a,button', text: 'Re-run job...').click
-      end
-      within('.modal-dialog') do
-        assert_selector 'a,button', text: 'Cancel'
-        if use_latest
-          page.choose("job_script_version_#{job['supplied_script_version']}")
-        end
-        click_on "Run now"
-      end
-
-      # Re-running jobs doesn't currently work because the test API
-      # server has no git repository to check against.  For now, check
-      # that the error message says something appropriate for that
-      # situation.
-      if expect_options && use_latest
-        assert_text "077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      else
-        assert_text "Script version #{job['script_version']} does not resolve to a commit"
-      end
-    end
-  end
-
   [
     ['active', true],
     ['job_reader2', false],
   ].each do |user, readable|
     test "view job with components as #{user} user" do
+      Rails.configuration.Users.AnonymousUserToken = ""
       job = api_fixture('jobs')['running_job_with_components']
       component1 = api_fixture('jobs')['completed_job_in_publicly_accessible_project']
       component2 = api_fixture('pipeline_instances')['running_pipeline_with_complete_job']
index adfd62bd8e04c73767ed2756a442ee15442691ec..b6e86d8ae5556d98b8f9d6f2fd8d5b8114035c1f 100644 (file)
@@ -61,135 +61,6 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
     end
   end
 
-  test 'Create and run a pipeline' do
-    visit page_with_token('active_trustedclient', '/pipeline_templates')
-    within('tr', text: 'Two Part Pipeline Template') do
-      find('a,button', text: 'Run').click
-    end
-
-    # project chooser
-    within('.modal-dialog') do #FIXME: source of 1 test error
-      find('.selectable', text: 'A Project').click
-      find('button', text: 'Choose').click
-    end
-
-    # This pipeline needs input. So, Run should be disabled
-    page.assert_selector 'a.disabled,button.disabled', text: 'Run'
-
-    instance_page = current_path
-
-    # Add this collection to the project
-    visit '/projects'
-    find("#projects-menu").click
-    find('.dropdown-menu a,button', text: 'A Project').click
-    find('.btn', text: 'Add data').click
-    find('.dropdown-menu a,button', text: 'Copy data from another project').click
-    within('.modal-dialog') do
-      wait_for_ajax
-      first('span', text: 'foo_tag').click
-      find('.btn', text: 'Copy').click
-    end
-    using_wait_time(Capybara.default_max_wait_time * 3) do
-      wait_for_ajax
-    end
-
-    click_link 'Pipelines and processes'
-    find('tr[data-kind="arvados#pipelineInstance"]', text: '(none)').
-      find('a', text: 'Show').
-      click
-
-    assert find('p', text: 'Provide a value')
-
-    find('div.form-group', text: 'Foo/bar pair').
-      find('.btn', text: 'Choose').
-      click
-
-    within('.modal-dialog') do
-      assert(has_text?("Foo/bar pair"),
-             "pipeline input picker missing name of input")
-      wait_for_ajax
-      first('span', text: 'foo_tag').click
-      find('button', text: 'OK').click
-    end
-    wait_for_ajax
-
-    # The input, after being specified, should still be displayed (#3382)
-    assert find('div.form-group', text: 'Foo/bar pair')
-
-    # The input, after being specified, should still be editable (#3382)
-    find('div.form-group', text: 'Foo/bar pair').
-      find('.btn', text: 'Choose').click
-
-    within('.modal-dialog') do
-      assert(has_text?("Foo/bar pair"),
-             "pipeline input picker missing name of input")
-      wait_for_ajax
-      first('span', text: 'foo_tag').click
-      find('button', text: 'OK').click
-    end
-
-    # For good measure, check one last time that the input, after being specified twice, is still be displayed (#3382)
-    assert find('div.form-group', text: 'Foo/bar pair')
-
-    # Ensure that the collection's portable_data_hash, uuid and name
-    # are saved in the desired places. (#4015)
-
-    # foo_collection_in_aproject is the collection tagged with foo_tag.
-    collection = api_fixture('collections', 'foo_collection_in_aproject')
-    click_link 'Advanced'
-    click_link 'API response'
-    api_response = JSON.parse(find('div#advanced_api_response pre').text)
-    input_params = api_response['components']['part-one']['script_parameters']['input']
-    assert_equal collection['portable_data_hash'], input_params['value']
-    assert_equal collection['name'], input_params['selection_name']
-    assert_equal collection['uuid'], input_params['selection_uuid']
-
-    # "Run" button is now enabled
-    page.assert_no_selector 'a.disabled,button.disabled', text: 'Run'
-
-    first('a,button', text: 'Run').click
-
-    # Pipeline is running. We have a "Pause" button instead now.
-    page.assert_selector 'a,button', text: 'Pause'
-    find('a,button', text: 'Pause').click
-
-    # Pipeline is stopped. It should now be in paused state and Runnable again.
-    assert page.has_text? 'Paused'
-    page.assert_no_selector 'a.disabled,button.disabled', text: 'Resume'
-    page.assert_selector 'a,button', text: 'Re-run with latest'
-    page.assert_selector 'a,button', text: 'Re-run options'
-
-    # Since it is test env, no jobs are created to run. So, graph not visible
-    assert page.has_no_text? 'Graph'
-  end
-
-  # Create a pipeline instance from within a project and run
-  test 'Create pipeline inside a project and run' do
-    visit page_with_token('active_trustedclient', '/projects')
-
-    # Add collection to the project using Add data button
-    find("#projects-menu").click
-    find('.dropdown-menu a,button', text: 'A Project').click
-    find('.btn', text: 'Add data').click
-    find('.dropdown-menu a,button', text: 'Copy data from another project').click
-    within('.modal-dialog') do
-      wait_for_ajax
-      first('span', text: 'foo_tag').click
-      find('.btn', text: 'Copy').click
-    end
-    using_wait_time(Capybara.default_max_wait_time * 3) do
-      wait_for_ajax
-    end
-
-    create_and_run_pipeline_in_aproject true, 'Two Part Pipeline Template', 'foo_collection_in_aproject', false
-  end
-
-  # Create a pipeline instance from outside of a project
-  test 'Run a pipeline from dashboard' do
-    visit page_with_token('active_trustedclient')
-    create_and_run_pipeline_in_aproject false, 'Two Part Pipeline Template', 'foo_collection_in_aproject', false
-  end
-
   test 'view pipeline with job and see graph' do
     visit page_with_token('active_trustedclient', '/pipeline_instances')
     assert page.has_text? 'pipeline_with_job'
@@ -202,24 +73,6 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
     page.assert_selector "#provenance_graph"
   end
 
-  test 'pipeline description' do
-    visit page_with_token('active_trustedclient', '/pipeline_instances')
-    assert page.has_text? 'pipeline_with_job'
-
-    find('a', text: 'pipeline_with_job').click
-
-    within('.arv-description-as-subtitle') do
-      find('.fa-pencil').click
-      find('.editable-input textarea').set('*Textile description for pipeline instance*')
-      find('.editable-submit').click
-    end
-    wait_for_ajax
-
-    # verify description
-    assert page.has_no_text? '*Textile description for pipeline instance*'
-    assert page.has_text? 'Textile description for pipeline instance'
-  end
-
   test "JSON popup available for strange components" do
     uuid = api_fixture("pipeline_instances")["components_is_jobspec"]["uuid"]
     visit page_with_token("active", "/pipeline_instances/#{uuid}")
@@ -246,228 +99,6 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
            "did not land on pipeline instance page")
   end
 
-  PROJECT_WITH_SEARCH_COLLECTION = "A Subproject"
-  def check_parameter_search(proj_name)
-    create_pipeline_from("parameter_with_search", proj_name)
-    search_text = api_fixture("pipeline_templates", "parameter_with_search",
-                              "components", "with-search",
-                              "script_parameters", "input", "search_for")
-    first("a.btn,button", text: "Choose").click
-    within(".modal-body") do
-      if (proj_name != PROJECT_WITH_SEARCH_COLLECTION)
-        # Switch finder modal to Subproject to find the Collection.
-        click_on proj_name
-        click_on PROJECT_WITH_SEARCH_COLLECTION
-      end
-      assert_equal(search_text, first("input").value,
-                   "parameter search not preseeded")
-      assert(has_text?(api_fixture("collections")["baz_collection_name_in_asubproject"]["name"]),
-             "baz Collection not in preseeded search results")
-    end
-  end
-
-  test "Workbench respects search_for parameter in templates" do
-    check_parameter_search(PROJECT_WITH_SEARCH_COLLECTION)
-  end
-
-  test "Workbench preserves search_for parameter after project switch" do
-    check_parameter_search("A Project")
-  end
-
-  test "enter a float for a number pipeline input" do
-    # Poltergeist either does not support the HTML 5 <input
-    # type="number">, or interferes with the associated X-Editable
-    # validation code.  If the input field has type=number (forcing an
-    # integer), this test will yield a false positive under
-    # Poltergeist.  --Brett, 2015-02-05
-    need_selenium "for strict X-Editable input validation"
-    create_pipeline_from("template_with_dataclass_number")
-    INPUT_SELECTOR =
-      ".editable[data-name='[components][work][script_parameters][input][value]']"
-    find(INPUT_SELECTOR).click
-    find(".editable-input input").set("12.34")
-    find("#editable-submit").click
-    assert_no_selector(".editable-popup")
-    assert_selector(INPUT_SELECTOR, text: "12.34")
-  end
-
-  [
-    [true, 'Two Part Pipeline Template', 'foo_collection_in_aproject', false],
-    [false, 'Two Part Pipeline Template', 'foo_collection_in_aproject', false],
-    [true, 'Two Part Template with dataclass File', 'foo_collection_in_aproject', true],
-    [false, 'Two Part Template with dataclass File', 'foo_collection_in_aproject', true],
-    [true, 'Two Part Pipeline Template', 'collection_with_no_name_in_aproject', false],
-  ].each do |in_aproject, template_name, collection, choose_file|
-    test "Run pipeline instance in #{in_aproject} with #{template_name} with #{collection} file #{choose_file}" do
-      if in_aproject
-        visit page_with_token 'active', \
-        '/projects/'+api_fixture('groups')['aproject']['uuid']
-      else
-        visit page_with_token 'active', '/'
-      end
-
-      # need bigger modal size when choosing a file from collection
-      if Capybara.current_driver == :selenium
-        Capybara.current_session.driver.browser.manage.window.resize_to(1200, 800)
-      end
-
-      create_and_run_pipeline_in_aproject in_aproject, template_name, collection, choose_file
-      instance_path = current_path
-
-      # Pause the pipeline
-      find('a,button', text: 'Pause').click
-      assert page.has_text? 'Paused'
-      page.assert_no_selector 'a.disabled,button.disabled', text: 'Resume'
-      page.assert_selector 'a,button', text: 'Re-run with latest'
-      page.assert_selector 'a,button', text: 'Re-run options'
-
-      # Verify that the newly created instance is created in the right project.
-      assert page.has_text? 'Home'
-      if in_aproject
-        assert page.has_text? 'A Project'
-      else
-        assert page.has_no_text? 'A Project'
-      end
-    end
-  end
-
-  [
-    ['active', false, false, false],
-    ['active', false, false, true],
-    ['active', true, false, false],
-    ['active', true, true, false],
-    ['active', true, false, true],
-    ['active', true, true, true],
-    ['project_viewer', false, false, true],
-    ['project_viewer', true, true, true],
-  ].each do |user, with_options, choose_options, in_aproject|
-    test "Rerun pipeline instance as #{user} using options #{with_options} #{choose_options} in #{in_aproject}" do
-      if in_aproject
-        path = '/pipeline_instances/'+api_fixture('pipeline_instances')['pipeline_owned_by_active_in_aproject']['uuid']
-      else
-        path = '/pipeline_instances/'+api_fixture('pipeline_instances')['pipeline_owned_by_active_in_home']['uuid']
-      end
-
-      visit page_with_token(user, path)
-
-      page.assert_selector 'a,button', text: 'Re-run with latest'
-      page.assert_selector 'a,button', text: 'Re-run options'
-
-      if user == 'project_viewer' && in_aproject
-        assert page.has_text? 'A Project'
-      end
-
-      # Now re-run the pipeline
-      if with_options
-        assert_triggers_dom_event 'shown.bs.modal' do
-          find('a,button', text: 'Re-run options').click
-        end
-        within('.modal-dialog') do
-          page.assert_selector 'a,button', text: 'Copy and edit inputs'
-          page.assert_selector 'a,button', text: 'Run now'
-          if choose_options
-            find('button', text: 'Copy and edit inputs').click
-          else
-            find('button', text: 'Run now').click
-          end
-        end
-      else
-        find('a,button', text: 'Re-run with latest').click
-      end
-
-      # Verify that the newly created instance is created in the right
-      # project. In case of project_viewer user, since the user cannot
-      # write to the project, the pipeline should have been created in
-      # the user's Home project.
-      assert_not_equal path, current_path, 'Rerun instance path expected to be different'
-      assert_text 'Home'
-      if in_aproject && (user != 'project_viewer')
-        assert_text 'A Project'
-      else
-        assert_no_text 'A Project'
-      end
-    end
-  end
-
-  # Create and run a pipeline for 'Two Part Pipeline Template' in 'A Project'
-  def create_and_run_pipeline_in_aproject in_aproject, template_name, collection_fixture, choose_file=false
-    # collection in aproject to be used as input
-    collection = api_fixture('collections', collection_fixture)
-    collection['name'] ||= '' # API response is "" even if fixture attr is null
-
-    # create a pipeline instance
-    find('.btn', text: 'Run a process').click
-    within('.modal-dialog') do
-      find('.selectable', text: template_name).click
-      find('.btn', text: 'Next: choose inputs').click
-    end
-
-    assert find('p', text: 'Provide a value')
-
-    find('div.form-group', text: 'Foo/bar pair').
-      find('.btn', text: 'Choose').
-      click
-
-    within('.modal-dialog') do
-      if in_aproject
-        assert_selector 'button.dropdown-toggle', text: 'A Project'
-        wait_for_ajax
-      else
-        assert_selector 'button.dropdown-toggle', text: 'Home'
-        wait_for_ajax
-        click_button "Home"
-        click_link "A Project"
-        wait_for_ajax
-      end
-
-      if collection_fixture == 'foo_collection_in_aproject'
-        first('span', text: 'foo_tag').click
-      elsif collection['name'] != ''
-        first('span', text: "#{collection['name']}").click
-      else
-        collection_uuid = collection['uuid']
-        find("div[data-object-uuid=#{collection_uuid}]").click
-      end
-
-      if choose_file
-        wait_for_ajax
-        find('.preview-selectable', text: 'foo').click
-      end
-      find('button', text: 'OK').click
-    end
-
-    # The input, after being specified, should still be displayed (#3382)
-    assert find('div.form-group', text: 'Foo/bar pair')
-
-    # Ensure that the collection's portable_data_hash, uuid and name
-    # are saved in the desired places. (#4015)
-    click_link 'Advanced'
-    click_link 'API response'
-
-    api_response = JSON.parse(find('div#advanced_api_response pre').text)
-    input_params = api_response['components']['part-one']['script_parameters']['input']
-    assert_equal(collection['uuid'], input_params['selection_uuid'], "Not found expected input param uuid")
-    if choose_file
-      assert_equal(collection['portable_data_hash']+'/foo', input_params['value'], "Not found expected input file param value")
-      assert_equal(collection['name']+'/foo', input_params['selection_name'], "Not found expected input file param name")
-    else
-      assert_equal(collection['portable_data_hash'], input_params['value'], "Not found expected input param value")
-      assert_equal(collection['name'], input_params['selection_name'], "Not found expected input selection name")
-    end
-
-    # "Run" button present and enabled
-    page.assert_no_selector 'a.disabled,button.disabled', text: 'Run'
-    first('a,button', text: 'Run').click
-
-    # Pipeline is running. We have a "Pause" button instead now.
-    page.assert_no_selector 'a,button', text: 'Run'
-    page.assert_no_selector 'a.disabled,button.disabled', text: 'Resume'
-    page.assert_selector 'a,button', text: 'Pause'
-
-    # Since it is test env, no jobs are created to run. So, graph not visible
-    assert page.has_no_text? 'Graph'
-  end
-
   [
     ['user1_with_load', 'zzzzz-d1hrv-10pipelines0001', 0], # run time 0 minutes
     ['user1_with_load', 'zzzzz-d1hrv-10pipelines0010', 17*60*60 + 51*60], # run time 17 hours and 51 minutes
index ad14df18a0009c7449707f4d135ca9bbb5b46a5a..1fc4427a38ad9ea67ad364d27044dea65c07e63f 100644 (file)
@@ -17,33 +17,4 @@ class PipelineTemplatesTest < ActionDispatch::IntegrationTest
            "components JSON not found")
   end
 
-  test "pipeline template description" do
-    need_javascript
-    visit page_with_token("active", "/pipeline_templates")
-
-    # go to Two Part pipeline template
-    within first('tr', text: 'Two Part Pipeline Template') do
-      find(".fa-gears").click
-    end
-
-    # edit template description
-    within('.arv-description-as-subtitle') do
-      find('.fa-pencil').click
-      find('.editable-input textarea').set('*Textile description for pipeline template* - "Go to dashboard":/')
-      find('.editable-submit').click
-    end
-    wait_for_ajax
-
-    # Verfiy edited description
-    assert page.has_no_text? '*Textile description for pipeline template*'
-    assert page.has_text? 'Textile description for pipeline template'
-    assert page.has_link? 'Go to dashboard'
-    click_link 'Go to dashboard'
-    assert page.has_text? 'Recent pipelines and processes'
-
-    # again visit recent templates page and verify edited description
-    visit page_with_token("active", "/pipeline_templates")
-    assert page.has_no_text? '*Textile description for pipeline template*'
-    assert page.has_text? 'Textile description for pipeline template'
-  end
 end
index 279d851017f0a9270d970a39e5ce030659b30075..17ab5e4661db335f7f40129d2dac246f69990e67 100644 (file)
@@ -735,7 +735,6 @@ class ProjectsTest < ActionDispatch::IntegrationTest
   end
 
   [
-    ['Two Part Pipeline Template', 'part-one', 'Provide a value for the following'],
     ['Workflow with input specifications', 'this workflow has inputs specified', 'Provide a value for the following'],
   ].each do |template_name, preview_txt, process_txt|
     test "run a process using template #{template_name} in a project" do
index 056598ef11e1719cf92409947b12be34d6d0485a..1fd9d900845e9ee8cac28bfe71a3e4d5d5f66ec2 100644 (file)
@@ -16,29 +16,6 @@ class RepositoriesTest < ActionDispatch::IntegrationTest
     need_javascript
   end
 
-  test "browse repository from jobs#show" do
-    sha1 = api_fixture('jobs')['running']['script_version']
-    _, fakecommit, fakefile =
-      stub_repo_content sha1: sha1, filename: 'crunch_scripts/hash'
-    show_object_using 'active', 'jobs', 'running', sha1
-    click_on api_fixture('jobs')['running']['script']
-    assert_text fakefile
-    click_on 'crunch_scripts'
-    assert_selector 'td a', text: 'hash'
-    click_on 'foo'
-    assert_selector 'td a', text: 'crunch_scripts'
-    click_on sha1
-    assert_text fakecommit
-
-    show_object_using 'active', 'jobs', 'running', sha1
-    click_on 'active/foo'
-    assert_selector 'td a', text: 'crunch_scripts'
-
-    show_object_using 'active', 'jobs', 'running', sha1
-    click_on sha1
-    assert_text fakecommit
-  end
-
   test "browse using arv-git-http" do
     repo = api_fixture('repositories')['foo']
     Repository.any_instance.
index a79220a8870095cb70315f088dab5ca7bb5c4743..83494173a9cea146371a703aebf67ffdc77c10ef 100644 (file)
@@ -77,67 +77,6 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     end
   end
 
-  test "pipeline instance arv-refresh-on-log-event" do
-    # Do something and check that the pane reloads.
-    p = use_token :active do
-      PipelineInstance.create(state: "RunningOnServer",
-                              components: {
-                                c1: {
-                                  script: "test_hash.py",
-                                  script_version: "1de84a854e2b440dc53bf42f8548afa4c17da332"
-                                }
-                              })
-    end
-    visit(page_with_token("active", "/pipeline_instances/#{p.uuid}"))
-
-    assert_text 'Active'
-    assert page.has_link? 'Pause'
-    assert_no_text 'Complete'
-    assert page.has_no_link? 'Re-run with latest'
-
-    use_token :dispatch1 do
-      p.update_attributes!(state: 'Complete')
-    end
-
-    assert_no_text 'Active'
-    assert page.has_no_link? 'Pause'
-    assert_text 'Complete'
-    assert page.has_link? 'Re-run with latest'
-  end
-
-  test "job arv-refresh-on-log-event" do
-    # Do something and check that the pane reloads.
-    uuid = api_fixture('jobs')['running_will_be_completed']['uuid']
-    visit(page_with_token("active", "/jobs/#{uuid}"))
-
-    assert_no_text 'complete'
-    assert_no_text 'Re-run job'
-
-    use_token :dispatch1 do
-      Job.find(uuid).update_attributes!(state: 'Complete')
-    end
-
-    assert_text 'complete'
-    assert_text 'Re-run job'
-  end
-
-  test "dashboard arv-refresh-on-log-event" do
-    visit(page_with_token("active", "/"))
-
-    assert_no_text 'test dashboard arv-refresh-on-log-event'
-
-    # Do something and check that the pane reloads.
-    use_token :active do
-      p = PipelineInstance.create({state: "RunningOnServer",
-                                    name: "test dashboard arv-refresh-on-log-event",
-                                    components: {
-                                    }
-                                  })
-    end
-
-    assert_text 'test dashboard arv-refresh-on-log-event'
-  end
-
   test 'job graph appears when first data point is already in logs table' do
     job_graph_first_datapoint_test
   end
index b58d59a82ae414012935543d21bf9ec211274f01..fe73f2734f3d9473a82c9ddb7e11498613ad645d 100644 (file)
@@ -78,8 +78,6 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
   end
 
   [
-    ['jobs', 'running_job_with_components', true, true],
-    ['pipeline_instances', 'components_is_jobspec', true, true],
     ['containers', 'running', false],
     ['container_requests', 'running', true],
   ].each do |type, fixture, cancelable, confirm_cancellation|
@@ -122,8 +120,6 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
   end
 
   [
-    ['jobs', 'running_job_with_components'],
-    ['pipeline_instances', 'has_component_with_completed_jobs'],
     ['container_requests', 'running'],
     ['container_requests', 'completed'],
   ].each do |type, fixture|
@@ -145,7 +141,6 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
   end
 
   [
-    ['Pipeline with default input specifications', 'part-one', 'Provide values for the following'],
     ['Workflow with default input specifications', 'this workflow has inputs specified', 'Provide a value for the following'],
   ].each do |template_name, preview_txt, process_txt|
     test "run a process using template #{template_name} from dashboard" do
@@ -248,31 +243,6 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
     end
   end
 
-  [
-    ['jobs', 'active', 'running_job_with_components', 'component1', '/jobs/zzzzz-8i9sb-jyq01m7in1jlofj#Log'],
-    ['pipeline_instances', 'active', 'pipeline_in_running_state', 'foo', '/jobs/zzzzz-8i9sb-pshmckwoma9plh7#Log'],
-    ['pipeline_instances', nil, 'pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', 'foo', 'Log unavailable'],
-  ].each do |type, token, fixture, child, log_link|
-    test "link_to_log for #{fixture} for #{token}" do
-      obj = api_fixture(type)[fixture]
-      if token
-        visit page_with_token token, "/#{type}/#{obj['uuid']}"
-      else
-        Rails.configuration.Users.AnonymousUserToken =
-          api_fixture("api_client_authorizations", "anonymous", "api_token")
-        visit "/#{type}/#{obj['uuid']}"
-      end
-
-      click_link(child)
-
-      if token
-        assert_selector "a[href=\"#{log_link}\"]"
-      else
-        assert_text log_link
-      end
-    end
-  end
-
   test 'Run from workflows index page' do
     visit page_with_token('active', '/workflows')
 
index 9e18a7063fc3e164fd6752d07c34703fb64aa415..54e7c08197109ba1cf26f9d73ccb15caa3fcb194 100644 (file)
@@ -5,13 +5,7 @@
 require 'test_helper'
 
 class DisabledApiTest < ActiveSupport::TestCase
-  test 'Job.creatable? reflects whether jobs.create API is enabled' do
-    use_token(:active) do
-      assert(Job.creatable?)
-    end
-    dd = ArvadosApiClient.new_or_current.discovery.deep_dup
-    dd[:resources][:jobs][:methods].delete(:create)
-    ArvadosApiClient.any_instance.stubs(:discovery).returns(dd)
+  test 'Job.creatable? is false' do
     use_token(:active) do
       refute(Job.creatable?)
     end
index 610dac90fe89443ffe9a18f53d59b686429c7e3b..3d3ea6f0f82798953c2d3eac7a646fa1240735a9 100644 (file)
@@ -37,10 +37,6 @@ RUN rpm -ivh epel-release-latest-7.noarch.rpm
 
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
 
-# Workbench depends on arvados-server for config manipulation
-ENV GOPATH /tmp
-RUN mkdir -p $GOPATH/src/git.curoverse.com && ln -sT /tmp/arvados $GOPATH/src/git.curoverse.com/arvados.git && cd $GOPATH/src/git.curoverse.com/arvados.git/cmd/arvados-server && go get -v github.com/kardianos/govendor && $GOPATH/bin/govendor sync && go get && go build && cp arvados-server /usr/local/bin/ && rm -rf /tmp/arvados
-
 # The version of setuptools that comes with CentOS is way too old
 RUN pip install --upgrade setuptools
 
index f8104472b6dd20e81988c3505646af5ae0f629df..c403d79bcc859a2925ce50752124d9aa1fad4068 100644 (file)
@@ -34,9 +34,5 @@ RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
 
-# Workbench depends on arvados-server for config manipulation
-ENV GOPATH /tmp
-RUN mkdir -p $GOPATH/src/git.curoverse.com && ln -sT /tmp/arvados $GOPATH/src/git.curoverse.com/arvados.git && cd $GOPATH/src/git.curoverse.com/arvados.git/cmd/arvados-server && go get -v github.com/kardianos/govendor && $GOPATH/bin/govendor sync && go get && go build && cp arvados-server /usr/local/bin/ && rm -rf /tmp/arvados
-
 ENV WORKSPACE /arvados
 CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "debian9"]
index 63c119c8405f1dc38020db111656af3d12576310..90f340e66cb65155be2737b3a0dd9c6de174d30c 100644 (file)
@@ -33,9 +33,5 @@ RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
 
-# Workbench depends on arvados-server for config manipulation
-ENV GOPATH /tmp
-RUN mkdir -p $GOPATH/src/git.curoverse.com && ln -sT /tmp/arvados $GOPATH/src/git.curoverse.com/arvados.git && cd $GOPATH/src/git.curoverse.com/arvados.git/cmd/arvados-server && go get -v github.com/kardianos/govendor && $GOPATH/bin/govendor sync && go get && go build && cp arvados-server /usr/local/bin/ && rm -rf /tmp/arvados
-
 ENV WORKSPACE /arvados
 CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "ubuntu1604"]
index 5e4d067671777843373536fe7b2ad42b98cf00e0..1adff74000ea208fe8ac6b66915b1f30f0f117e2 100644 (file)
@@ -33,9 +33,5 @@ RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
 
-# Workbench depends on arvados-server for config manipulation
-ENV GOPATH /tmp
-RUN mkdir -p $GOPATH/src/git.curoverse.com && ln -sT /tmp/arvados $GOPATH/src/git.curoverse.com/arvados.git && cd $GOPATH/src/git.curoverse.com/arvados.git/cmd/arvados-server && go get -v github.com/kardianos/govendor && $GOPATH/bin/govendor sync && go get && go build && cp arvados-server /usr/local/bin/ && rm -rf /tmp/arvados
-
 ENV WORKSPACE /arvados
 CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "ubuntu1804"]
index 5c7dc342ee9c40b546ad38e377555193d6192975..378c9bbfa39dc9dfedc134636ce5806bf6d6cf65 100755 (executable)
@@ -261,7 +261,6 @@ else
     set +e
     mv -f ${WORKSPACE}/packages/${TARGET}/* ${WORKSPACE}/packages/${TARGET}/processed/ 2>/dev/null
     set -e
-set -x
     # Build packages.
     if docker run \
         --rm \
index ae677a12c16059fc4ca7796a69dbf6337856f09e..bebcae0653ef0c54ea37cd5b50c06f03c8590520 100755 (executable)
@@ -290,8 +290,6 @@ package_go_binary cmd/arvados-server arvados-controller \
     "Arvados cluster controller daemon"
 package_go_binary cmd/arvados-server arvados-dispatch-cloud \
     "Arvados cluster cloud dispatch"
-package_go_binary sdk/go/crunchrunner crunchrunner \
-    "Crunchrunner executes a command inside a container and uploads the output"
 package_go_binary services/arv-git-httpd arvados-git-httpd \
     "Provide authenticated http access to Arvados-hosted git repositories"
 package_go_binary services/crunch-dispatch-local crunch-dispatch-local \
@@ -358,13 +356,16 @@ mkdir cwltest/bin && touch cwltest/bin/cwltest
 fpm_build_virtualenv "cwltest" "cwltest"
 rm -rf "$WORKSPACE/cwltest"
 
+calculate_go_package_version arvados_server_version cmd/arvados-server
+arvados_server_iteration=$(default_iteration "arvados-server" "$arvados_server_version" "go")
+
 # Build the API server package
 test_rails_package_presence arvados-api-server "$WORKSPACE/services/api"
 if [[ "$?" == "0" ]]; then
   handle_rails_package arvados-api-server "$WORKSPACE/services/api" \
       "$WORKSPACE/agpl-3.0.txt" --url="https://arvados.org" \
       --description="Arvados API server - Arvados is a free and open source platform for big data science." \
-      --license="GNU Affero General Public License, version 3.0"
+      --license="GNU Affero General Public License, version 3.0" --depends "arvados-server = ${arvados_server_version}-${arvados_server_iteration}"
 fi
 
 # Build the workbench server package
@@ -372,6 +373,22 @@ test_rails_package_presence arvados-workbench "$WORKSPACE/apps/workbench"
 if [[ "$?" == "0" ]] ; then
   (
       set -e
+
+      # The workbench package has a build-time dependency on the arvados-server
+      # package for config manipulation, so install it first.
+      cd $WORKSPACE/cmd/arvados-server
+      get_complete_package_name arvados_server_pkgname arvados-server ${arvados_server_version} go
+
+      arvados_server_pkg_path="$WORKSPACE/packages/$TARGET/${arvados_server_pkgname}"
+      if [[ ! -e ${arvados_server_pkg_path} ]]; then
+        arvados_server_pkg_path="$WORKSPACE/packages/$TARGET/processed/${arvados_server_pkgname}"
+      fi
+      if [[ "$FORMAT" == "deb" ]]; then
+        dpkg -i ${arvados_server_pkg_path}
+      else
+        rpm -i ${arvados_server_pkg_path}
+      fi
+
       cd "$WORKSPACE/apps/workbench"
 
       # We need to bundle to be ready even when we build a package without vendor directory
@@ -390,8 +407,8 @@ if [[ "$?" == "0" ]] ; then
       mv /tmp/x /etc/arvados/config.yml
       perl -p -i -e 'BEGIN{undef $/;} s/WebDAV(.*?):\n( *)ExternalURL: ""/WebDAV$1:\n$2ExternalURL: "example.com"/g' /etc/arvados/config.yml
 
-      RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake npm:install >/dev/null
-      RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake assets:precompile >/dev/null
+      RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake npm:install >"$STDOUT_IF_DEBUG"
+      RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake assets:precompile >"$STDOUT_IF_DEBUG"
 
       # Remove generated configuration files so they don't go in the package.
       rm -rf /etc/arvados/
@@ -404,7 +421,7 @@ if [[ "$?" == "0" ]] ; then
     handle_rails_package arvados-workbench "$WORKSPACE/apps/workbench" \
         "$WORKSPACE/agpl-3.0.txt" --url="https://arvados.org" \
         --description="Arvados Workbench - Arvados is a free and open source platform for big data science." \
-        --license="GNU Affero General Public License, version 3.0"
+        --license="GNU Affero General Public License, version 3.0" --depends "arvados-server = ${arvados_server_version}-${arvados_server_iteration}"
   fi
 fi
 
index 962940b799bd1907cc0540dd60a0f96643ebeb18..d75e2785eca730d748f6e59446f6509a233fa1dd 100755 (executable)
@@ -14,6 +14,8 @@ Syntax:
 --upload
     If the build and test steps are successful, upload the packages
     to a remote apt repository (default: false)
+--debug
+    Output debug information (default: false)
 --rc
     Optional Parameter to build Release Candidate
 --build-version <version>
@@ -42,7 +44,7 @@ if ! [[ -d "$WORKSPACE" ]]; then
 fi
 
 PARSEDOPTS=$(getopt --name "$0" --longoptions \
-    help,upload,rc,target:,build-version: \
+    help,debug,upload,rc,target:,build-version: \
     -- "" "$@")
 if [ $? -ne 0 ]; then
     exit 1
@@ -51,6 +53,7 @@ fi
 TARGET=debian9
 UPLOAD=0
 RC=0
+DEBUG=
 
 declare -a build_args=()
 
@@ -65,6 +68,9 @@ while [ $# -gt 0 ]; do
         --target)
             TARGET="$2"; shift
             ;;
+        --debug)
+            DEBUG=" --debug"
+            ;;
         --upload)
             UPLOAD=1
             ;;
@@ -99,7 +105,7 @@ COLUMNS=80
 title "Start build packages"
 timer_reset
 
-$WORKSPACE/build/run-build-packages-one-target.sh "${build_args[@]}"
+$WORKSPACE/build/run-build-packages-one-target.sh "${build_args[@]}"$DEBUG
 
 checkexit $? "build packages"
 title "End of build packages (`timer`)"
@@ -108,7 +114,7 @@ title "Start test packages"
 timer_reset
 
 if [ ${#failures[@]} -eq 0 ]; then
-  $WORKSPACE/build/run-build-packages-one-target.sh "${build_args[@]}" --test-packages
+  $WORKSPACE/build/run-build-packages-one-target.sh "${build_args[@]}" --test-packages$DEBUG
 else
   echo "Skipping package upload, there were errors building the packages"
 fi
index b4c2d1ddc8e840c3602b0ebc62c81c6f99dd83f8..cb5549df7183676217270adc03b98b1ddbb0cf25 100755 (executable)
@@ -103,6 +103,40 @@ handle_ruby_gem() {
     fi
 }
 
+calculate_go_package_version() {
+  # $__returnvar has the nameref attribute set, which means it is a reference
+  # to another variable that is passed in as the first argument to this function.
+  # see https://www.gnu.org/software/bash/manual/html_node/Shell-Parameters.html
+  local -n __returnvar="$1"; shift
+  local src_path="$1"; shift
+
+  mkdir -p "$GOPATH/src/git.curoverse.com"
+  ln -sfn "$WORKSPACE" "$GOPATH/src/git.curoverse.com/arvados.git"
+  (cd "$GOPATH/src/git.curoverse.com/arvados.git" && "$GOPATH/bin/govendor" sync -v)
+
+  cd "$GOPATH/src/git.curoverse.com/arvados.git/$src_path"
+  local version="$(version_from_git)"
+  local timestamp="$(timestamp_from_git)"
+
+  # Update the version number and build a new package if the vendor
+  # bundle has changed, or the command imports anything from the
+  # Arvados SDK and the SDK has changed.
+  declare -a checkdirs=(vendor)
+  if grep -qr git.curoverse.com/arvados .; then
+      checkdirs+=(sdk/go lib)
+  fi
+  for dir in ${checkdirs[@]}; do
+      cd "$GOPATH/src/git.curoverse.com/arvados.git/$dir"
+      ts="$(timestamp_from_git)"
+      if [[ "$ts" -gt "$timestamp" ]]; then
+          version=$(version_from_git)
+          timestamp="$ts"
+      fi
+  done
+
+  __returnvar="$version"
+}
+
 # Usage: package_go_binary services/foo arvados-foo "Compute foo to arbitrary precision"
 package_go_binary() {
     local src_path="$1"; shift
@@ -110,46 +144,27 @@ package_go_binary() {
     local description="$1"; shift
     local license_file="${1:-agpl-3.0.txt}"; shift
 
-    if [[ -n "$ONLY_BUILD" ]] && [[ "$prog" != "$ONLY_BUILD" ]] ; then
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$prog" != "$ONLY_BUILD" ]]; then
+      # arvados-workbench depends on arvados-server at build time, so even when
+      # only arvados-workbench is being built, we need to build arvados-server too
+      if [[ "$prog" != "arvados-server" ]] || [[ "$ONLY_BUILD" != "arvados-workbench" ]]; then
         return 0
+      fi
     fi
 
     debug_echo "package_go_binary $src_path as $prog"
 
     local basename="${src_path##*/}"
-
-    mkdir -p "$GOPATH/src/git.curoverse.com"
-    ln -sfn "$WORKSPACE" "$GOPATH/src/git.curoverse.com/arvados.git"
-    (cd "$GOPATH/src/git.curoverse.com/arvados.git" && "$GOPATH/bin/govendor" sync -v)
-
-    cd "$GOPATH/src/git.curoverse.com/arvados.git/$src_path"
-    local version="$(version_from_git)"
-    local timestamp="$(timestamp_from_git)"
-
-    # Update the version number and build a new package if the vendor
-    # bundle has changed, or the command imports anything from the
-    # Arvados SDK and the SDK has changed.
-    declare -a checkdirs=(vendor)
-    if grep -qr git.curoverse.com/arvados .; then
-        checkdirs+=(sdk/go lib)
-    fi
-    for dir in ${checkdirs[@]}; do
-        cd "$GOPATH/src/git.curoverse.com/arvados.git/$dir"
-        ts="$(timestamp_from_git)"
-        if [[ "$ts" -gt "$timestamp" ]]; then
-            version=$(version_from_git)
-            timestamp="$ts"
-        fi
-    done
+    calculate_go_package_version go_package_version $src_path
 
     cd $WORKSPACE/packages/$TARGET
-    test_package_presence $prog $version go
+    test_package_presence $prog $go_package_version go
 
     if [[ "$?" != "0" ]]; then
       return 1
     fi
 
-    go get -ldflags "-X main.version=${version}" "git.curoverse.com/arvados.git/$src_path"
+    go get -ldflags "-X main.version=${go_package_version}" "git.curoverse.com/arvados.git/$src_path"
 
     local -a switches=()
     systemd_unit="$WORKSPACE/${src_path}/${prog}.service"
@@ -161,7 +176,7 @@ package_go_binary() {
     fi
     switches+=("$WORKSPACE/${license_file}=/usr/share/doc/$prog/${license_file}")
 
-    fpm_build "$GOPATH/bin/${basename}=/usr/bin/${prog}" "${prog}" dir "${version}" "--url=https://arvados.org" "--license=GNU Affero General Public License, version 3.0" "--description=${description}" "${switches[@]}"
+    fpm_build "$GOPATH/bin/${basename}=/usr/bin/${prog}" "${prog}" dir "${go_package_version}" "--url=https://arvados.org" "--license=GNU Affero General Public License, version 3.0" "--description=${description}" "${switches[@]}"
 }
 
 default_iteration() {
@@ -213,47 +228,71 @@ test_rails_package_presence() {
   test_package_presence $pkgname $version rails "$RAILS_PACKAGE_ITERATION"
 }
 
-test_package_presence() {
-    local pkgname="$1"; shift
-    local version="$1"; shift
-    local pkgtype="$1"; shift
-    local iteration="$1"; shift
-    local arch="$1"; shift
+get_complete_package_name() {
+  # if the errexit flag is set, unset it until this function returns
+  # otherwise, the shift calls below will abort the program if optional arguments are not supplied
+  if [ -o errexit ]; then
+    set +e
+    trap 'set -e' RETURN
+  fi
+  # $__returnvar has the nameref attribute set, which means it is a reference
+  # to another variable that is passed in as the first argument to this function.
+  # see https://www.gnu.org/software/bash/manual/html_node/Shell-Parameters.html
+  local -n __returnvar="$1"; shift
+  local pkgname="$1"; shift
+  local version="$1"; shift
+  local pkgtype="$1"; shift
+  local iteration="$1"; shift
+  local arch="$1"; shift
+  if [[ "$iteration" == "" ]]; then
+      iteration="$(default_iteration "$pkgname" "$version" "$pkgtype")"
+  fi
 
-    if [[ -n "$ONLY_BUILD" ]] && [[ "$pkgname" != "$ONLY_BUILD" ]] ; then
-        return 1
-    fi
+  if [[ "$arch" == "" ]]; then
+    rpm_architecture="x86_64"
+    deb_architecture="amd64"
 
-    if [[ "$iteration" == "" ]]; then
-        iteration="$(default_iteration "$pkgname" "$version" "$pkgtype")"
+    if [[ "$pkgtype" =~ ^(src)$ ]]; then
+      rpm_architecture="noarch"
+      deb_architecture="all"
     fi
 
-    if [[ "$arch" == "" ]]; then
+    # These python packages have binary components
+    if [[ "$pkgname" =~ (ruamel|ciso|pycrypto|pyyaml) ]]; then
       rpm_architecture="x86_64"
       deb_architecture="amd64"
+    fi
+  else
+    rpm_architecture=$arch
+    deb_architecture=$arch
+  fi
 
-      if [[ "$pkgtype" =~ ^(src)$ ]]; then
-        rpm_architecture="noarch"
-        deb_architecture="all"
-      fi
+  local complete_pkgname="${pkgname}_$version${iteration:+-$iteration}_$deb_architecture.deb"
+  if [[ "$FORMAT" == "rpm" ]]; then
+      # rpm packages get iteration 1 if we don't supply one
+      iteration=${iteration:-1}
+      complete_pkgname="$pkgname-$version-${iteration}.$rpm_architecture.rpm"
+  fi
+  __returnvar=${complete_pkgname}
+}
 
-      # These python packages have binary components
-      if [[ "$pkgname" =~ (ruamel|ciso|pycrypto|pyyaml) ]]; then
-        rpm_architecture="x86_64"
-        deb_architecture="amd64"
+# Test if the package already exists, if not return 0, if it does return 1
+test_package_presence() {
+    local pkgname="$1"; shift
+    local version="$1"; shift
+    local pkgtype="$1"; shift
+    local iteration="$1"; shift
+    local arch="$1"; shift
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$pkgname" != "$ONLY_BUILD" ]] ; then
+      # arvados-workbench depends on arvados-server at build time, so even when
+      # only arvados-workbench is being built, we need to build arvados-server too
+      if [[ "$pkgname" != "arvados-server" ]] || [[ "$ONLY_BUILD" != "arvados-workbench" ]]; then
+        return 1
       fi
-    else
-      rpm_architecture=$arch
-      deb_architecture=$arch
     fi
 
-    if [[ "$FORMAT" == "deb" ]]; then
-        local complete_pkgname="${pkgname}_$version${iteration:+-$iteration}_$deb_architecture.deb"
-    else
-        # rpm packages get iteration 1 if we don't supply one
-        iteration=${iteration:-1}
-        local complete_pkgname="$pkgname-$version-${iteration}.$rpm_architecture.rpm"
-    fi
+    local full_pkgname
+    get_complete_package_name full_pkgname $pkgname $version $pkgtype $iteration $arch
 
     # See if we can skip building the package, only if it already exists in the
     # processed/ directory. If so, move it back to the packages directory to make
@@ -274,32 +313,32 @@ test_package_presence() {
       fi
 
       repo_pkg_list=$(curl -s -o - http://apt.arvados.org/pool/${D}/main/${repo_subdir}/)
-      echo ${repo_pkg_list} |grep -q ${complete_pkgname}
+      echo ${repo_pkg_list} |grep -q ${full_pkgname}
       if [ $? -eq 0 ] ; then
-        echo "Package $complete_pkgname exists, not rebuilding!"
-        curl -s -o ./${complete_pkgname} http://apt.arvados.org/pool/${D}/main/${repo_subdir}/${complete_pkgname}
+        echo "Package $full_pkgname exists upstream, not rebuilding, downloading instead!"
+        curl -s -o "$WORKSPACE/packages/$TARGET/${full_pkgname}" http://apt.arvados.org/pool/${D}/main/${repo_subdir}/${full_pkgname}
         return 1
-      elif test -f "$WORKSPACE/packages/$TARGET/processed/${complete_pkgname}" ; then
-        echo "Package $complete_pkgname exists, not rebuilding!"
+      elif test -f "$WORKSPACE/packages/$TARGET/processed/${full_pkgname}" ; then
+        echo "Package $full_pkgname exists, not rebuilding!"
         return 1
       else
-        echo "Package $complete_pkgname not found, building"
+        echo "Package $full_pkgname not found, building"
         return 0
       fi
     else
       centos_repo="http://rpm.arvados.org/CentOS/7/dev/x86_64/"
 
       repo_pkg_list=$(curl -s -o - ${centos_repo})
-      echo ${repo_pkg_list} |grep -q ${complete_pkgname}
+      echo ${repo_pkg_list} |grep -q ${full_pkgname}
       if [ $? -eq 0 ]; then
-        echo "Package $complete_pkgname exists, not rebuilding!"
-        curl -s -o ./${complete_pkgname} ${centos_repo}${complete_pkgname}
+        echo "Package $full_pkgname exists upstream, not rebuilding, downloading instead!"
+        curl -s -o "$WORKSPACE/packages/$TARGET/${full_pkgname}" ${centos_repo}${full_pkgname}
         return 1
-      elif test -f "$WORKSPACE/packages/$TARGET/processed/${complete_pkgname}" ; then
-        echo "Package $complete_pkgname exists, not rebuilding!"
+      elif test -f "$WORKSPACE/packages/$TARGET/processed/${full_pkgname}" ; then
+        echo "Package $full_pkgname exists, not rebuilding!"
         return 1
       else
-        echo "Package $complete_pkgname not found, building"
+        echo "Package $full_pkgname not found, building"
         return 0
       fi
     fi
@@ -681,7 +720,11 @@ fpm_build () {
   shift
 
   if [[ -n "$ONLY_BUILD" ]] && [[ "$PACKAGE_NAME" != "$ONLY_BUILD" ]] && [[ "$PACKAGE" != "$ONLY_BUILD" ]] ; then
+    # arvados-workbench depends on arvados-server at build time, so even when
+    # only arvados-workbench is being built, we need to build arvados-server too
+    if [[ "$PACKAGE_NAME" != "arvados-server" ]] || [[ "$ONLY_BUILD" != "arvados-workbench" ]]; then
       return 0
+    fi
   fi
 
   local default_iteration_value="$(default_iteration "$PACKAGE" "$VERSION" "$PACKAGE_TYPE")"
index 589e05c8a14c92a84de4ba2ae2fe586d72bc3a3d..308af5a01a20e0e4bf93e9cbe261d1aede87b51f 100644 (file)
@@ -18,15 +18,13 @@ var (
                "-version":  cmd.Version,
                "--version": cmd.Version,
 
-               "copy":     cli.Copy,
-               "create":   cli.Create,
-               "edit":     cli.Edit,
-               "get":      cli.Get,
-               "keep":     cli.Keep,
-               "pipeline": cli.Pipeline,
-               "run":      cli.Run,
-               "tag":      cli.Tag,
-               "ws":       cli.Ws,
+               "copy":   cli.Copy,
+               "create": cli.Create,
+               "edit":   cli.Edit,
+               "get":    cli.Get,
+               "keep":   cli.Keep,
+               "tag":    cli.Tag,
+               "ws":     cli.Ws,
 
                "api_client_authorization": cli.APICall,
                "api_client":               cli.APICall,
diff --git a/crunch_scripts/GATK2-VariantFiltration b/crunch_scripts/GATK2-VariantFiltration
deleted file mode 100755 (executable)
index 0ef4a74..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-gatk_path = arvados.util.tarball_extract(
-    tarball = this_job['script_parameters']['gatk_binary_tarball'],
-    path = 'gatk')
-bundle_path = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['gatk_bundle'],
-    path = 'gatk-bundle',
-    files = ['human_g1k_v37.dict', 'human_g1k_v37.fasta', 'human_g1k_v37.fasta.fai'])
-this_task_input = this_task['parameters']['input']
-
-input_file = list(arvados.CollectionReader(this_task_input).all_files())[0]
-
-# choose vcf temporary file names
-vcf_in = os.path.join(arvados.current_task().tmpdir,
-                      os.path.basename(input_file.name()))
-vcf_out = re.sub('(.*)\\.vcf', '\\1-filtered.vcf', vcf_in)
-
-# fetch the unfiltered data
-vcf_in_file = open(vcf_in, 'w')
-for buf in input_file.readall():
-    vcf_in_file.write(buf)
-vcf_in_file.close()
-
-stdoutdata, stderrdata = arvados.util.run_command(
-    ['java', '-Xmx1g',
-     '-jar', os.path.join(gatk_path,'GenomeAnalysisTK.jar'),
-     '-T', 'VariantFiltration', '--variant', vcf_in,
-     '--out', vcf_out,
-     '--filterExpression', 'QD < 2.0',
-     '--filterName', 'GATK_QD',
-     '--filterExpression', 'MQ < 40.0',
-     '--filterName', 'GATK_MQ',
-     '--filterExpression', 'FS > 60.0',
-     '--filterName', 'GATK_FS',
-     '--filterExpression', 'MQRankSum < -12.5',
-     '--filterName', 'GATK_MQRankSum',
-     '--filterExpression', 'ReadPosRankSum < -8.0',
-     '--filterName', 'GATK_ReadPosRankSum',
-     '-R', os.path.join(bundle_path, 'human_g1k_v37.fasta')],
-    cwd=arvados.current_task().tmpdir)
-
-# store the filtered data
-with open(vcf_out, 'rb') as f:
-    out = arvados.CollectionWriter()
-    while True:
-        buf = f.read()
-        if len(buf) == 0:
-            break
-        out.write(buf)
-out.set_current_file_name(os.path.basename(vcf_out))
-
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/GATK2-bqsr b/crunch_scripts/GATK2-bqsr
deleted file mode 100755 (executable)
index ab78226..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-import re
-import arvados
-import arvados_gatk2
-import arvados_samtools
-from arvados_ipc import *
-
-class InvalidArgumentError(Exception):
-    pass
-
-arvados_samtools.one_task_per_bam_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-tmpdir = arvados.current_task().tmpdir
-arvados.util.clear_tmpdir()
-
-known_sites_files = arvados.getjobparam(
-    'known_sites',
-    ['dbsnp_137.b37.vcf',
-     'Mills_and_1000G_gold_standard.indels.b37.vcf',
-     ])
-bundle_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['gatk_bundle'],
-    files = [
-        'human_g1k_v37.dict',
-        'human_g1k_v37.fasta',
-        'human_g1k_v37.fasta.fai'
-        ] + known_sites_files + [v + '.idx' for v in known_sites_files],
-    path = 'gatk_bundle')
-ref_fasta_files = [os.path.join(bundle_dir, f)
-                   for f in os.listdir(bundle_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f)]
-
-input_collection = this_task['parameters']['input']
-input_dir = arvados.util.collection_extract(
-    collection = input_collection,
-    path = os.path.join(this_task.tmpdir, 'input'))
-input_bam_files = []
-for f in arvados.util.listdir_recursive(input_dir):
-    if re.search(r'\.bam$', f):
-        input_stream_name, input_file_name = os.path.split(f)
-        input_bam_files += [os.path.join(input_dir, f)]
-if len(input_bam_files) != 1:
-    raise InvalidArgumentError("Expected exactly one bam file per task.")
-
-known_sites_args = []
-for f in known_sites_files:
-    known_sites_args += ['-knownSites', os.path.join(bundle_dir, f)]
-
-recal_file = os.path.join(tmpdir, 'recal.csv')
-
-children = {}
-pipes = {}
-
-arvados_gatk2.run(
-    args=[
-        '-nct', arvados_gatk2.cpus_on_this_node(),
-        '-T', 'BaseRecalibrator',
-        '-R', ref_fasta_files[0],
-        '-I', input_bam_files[0],
-        '-o', recal_file,
-        ] + known_sites_args)
-
-pipe_setup(pipes, 'BQSR')
-if 0 == named_fork(children, 'BQSR'):
-    pipe_closeallbut(pipes, ('BQSR', 'w'))
-    arvados_gatk2.run(
-        args=[
-        '-T', 'PrintReads',
-        '-R', ref_fasta_files[0],
-        '-I', input_bam_files[0],
-        '-o', '/dev/fd/' + str(pipes['BQSR','w']),
-        '-BQSR', recal_file,
-        '--disable_bam_indexing',
-        ],
-        close_fds=False)
-    os._exit(0)
-os.close(pipes.pop(('BQSR','w'), None))
-
-out = arvados.CollectionWriter()
-out.start_new_stream(input_stream_name)
-
-out.start_new_file(input_file_name + '.recal.csv')
-out.write(open(recal_file, 'rb'))
-
-out.start_new_file(input_file_name)
-while True:
-    buf = os.read(pipes['BQSR','r'], 2**20)
-    if len(buf) == 0:
-        break
-    out.write(buf)
-pipe_closeallbut(pipes)
-
-if waitpid_and_check_children(children):
-    this_task.set_output(out.finish())
-else:
-    sys.exit(1)
diff --git a/crunch_scripts/GATK2-merge-call b/crunch_scripts/GATK2-merge-call
deleted file mode 100755 (executable)
index 6d17517..0000000
+++ /dev/null
@@ -1,242 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-import re
-import string
-import threading
-import arvados
-import arvados_gatk2
-import arvados_picard
-from arvados_ipc import *
-
-class InvalidArgumentError(Exception):
-    pass
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-tmpdir = arvados.current_task().tmpdir
-arvados.util.clear_tmpdir()
-
-bundle_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['gatk_bundle'],
-    files = [
-        'human_g1k_v37.dict',
-        'human_g1k_v37.fasta',
-        'human_g1k_v37.fasta.fai',
-        'dbsnp_137.b37.vcf',
-        'dbsnp_137.b37.vcf.idx',
-        ],
-    path = 'gatk_bundle')
-ref_fasta_files = [os.path.join(bundle_dir, f)
-                   for f in os.listdir(bundle_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f)]
-regions_args = []
-if 'regions' in this_job['script_parameters']:
-    regions_dir = arvados.util.collection_extract(
-        collection = this_job['script_parameters']['regions'],
-        path = 'regions')
-    region_padding = int(this_job['script_parameters']['region_padding'])
-    for f in os.listdir(regions_dir):
-        if re.search(r'\.bed$', f):
-            regions_args += [
-                '--intervals', os.path.join(regions_dir, f),
-                '--interval_padding', str(region_padding)
-                ]
-
-
-# Start a child process for each input file, feeding data to picard.
-
-input_child_names = []
-children = {}
-pipes = {}
-
-input_collection = this_job['script_parameters']['input']
-input_index = 0
-for s in arvados.CollectionReader(input_collection).all_streams():
-    for f in s.all_files():
-        if not re.search(r'\.bam$', f.name()):
-            continue
-        input_index += 1
-        childname = 'input-' + str(input_index)
-        input_child_names += [childname]
-        pipe_setup(pipes, childname)
-        childpid = named_fork(children, childname)
-        if childpid == 0:
-            pipe_closeallbut(pipes, (childname, 'w'))
-            for s in f.readall():
-                os.write(pipes[childname, 'w'], s)
-            os.close(pipes[childname, 'w'])
-            os._exit(0)
-        sys.stderr.write("pid %d writing %s to fd %d->%d\n" %
-                         (childpid,
-                          s.name()+'/'+f.name(),
-                          pipes[childname, 'w'],
-                          pipes[childname, 'r']))
-        pipe_closeallbut(pipes, *[(childname, 'r')
-                                  for childname in input_child_names])
-
-
-# Merge-sort the input files to merge.bam
-
-arvados_picard.run(
-    'MergeSamFiles',
-    args=[
-        'I=/dev/fd/' + str(pipes[childname, 'r'])
-        for childname in input_child_names
-        ],
-    params={
-        'o': 'merge.bam',
-        'quiet': 'true',
-        'so': 'coordinate',
-        'use_threading': 'true',
-        'create_index': 'true',
-        'validation_stringency': 'LENIENT',
-        },
-    close_fds=False,
-    )
-pipe_closeallbut(pipes)
-
-
-# Run CoverageBySample on merge.bam
-
-pipe_setup(pipes, 'stats_log')
-pipe_setup(pipes, 'stats_out')
-if 0 == named_fork(children, 'GATK'):
-    pipe_closeallbut(pipes,
-                     ('stats_log', 'w'),
-                     ('stats_out', 'w'))
-    arvados_gatk2.run(
-        args=[
-            '-T', 'CoverageBySample',
-            '-R', ref_fasta_files[0],
-            '-I', 'merge.bam',
-            '-o', '/dev/fd/' + str(pipes['stats_out', 'w']),
-            '--log_to_file', '/dev/fd/' + str(pipes['stats_log', 'w']),
-            ]
-        + regions_args,
-        close_fds=False)
-    pipe_closeallbut(pipes)
-    os._exit(0)
-pipe_closeallbut(pipes, ('stats_log', 'r'), ('stats_out', 'r'))
-
-
-# Start two threads to read from CoverageBySample pipes
-
-class ExceptionPropagatingThread(threading.Thread):
-    """
-    If a subclassed thread calls _raise(e) in run(), running join() on
-    the thread will raise e in the thread that calls join().
-    """
-    def __init__(self, *args, **kwargs):
-        super(ExceptionPropagatingThread, self).__init__(*args, **kwargs)
-        self.__exception = None
-    def join(self, *args, **kwargs):
-        ret = super(ExceptionPropagatingThread, self).join(*args, **kwargs)
-        if self.__exception:
-            raise self.__exception
-        return ret
-    def _raise(self, exception):
-        self.__exception = exception
-
-class StatsLogReader(ExceptionPropagatingThread):
-    def __init__(self, **kwargs):
-        super(StatsLogReader, self).__init__()
-        self.args = kwargs
-    def run(self):
-        try:
-            for logline in self.args['infile']:
-                x = re.search('Processing (\d+) bp from intervals', logline)
-                if x:
-                    self._total_bp = int(x.group(1))
-        except Exception as e:
-            self._raise(e)
-    def total_bp(self):
-        self.join()
-        return self._total_bp
-stats_log_thr = StatsLogReader(infile=os.fdopen(pipes.pop(('stats_log', 'r'))))
-stats_log_thr.start()
-
-class StatsOutReader(ExceptionPropagatingThread):
-    """
-    Read output of CoverageBySample and collect a histogram of
-    coverage (last column) -> number of loci (number of rows).
-    """
-    def __init__(self, **kwargs):
-        super(StatsOutReader, self).__init__()
-        self.args = kwargs
-    def run(self):
-        try:
-            hist = [0]
-            histtot = 0
-            for line in self.args['infile']:
-                try:
-                    i = int(string.split(line)[-1])
-                except ValueError:
-                    continue
-                if i >= 1:
-                    if len(hist) <= i:
-                        hist.extend([0 for x in range(1+i-len(hist))])
-                    hist[i] += 1
-                    histtot += 1
-            hist[0] = stats_log_thr.total_bp() - histtot
-            self._histogram = hist
-        except Exception as e:
-            self._raise(e)
-    def histogram(self):
-        self.join()
-        return self._histogram
-stats_out_thr = StatsOutReader(infile=os.fdopen(pipes.pop(('stats_out', 'r'))))
-stats_out_thr.start()
-
-
-# Run UnifiedGenotyper on merge.bam
-
-arvados_gatk2.run(
-    args=[
-        '-nt', arvados_gatk2.cpus_on_this_node(),
-        '-T', 'UnifiedGenotyper',
-        '-R', ref_fasta_files[0],
-        '-I', 'merge.bam',
-        '-o', os.path.join(tmpdir, 'out.vcf'),
-        '--dbsnp', os.path.join(bundle_dir, 'dbsnp_137.b37.vcf'),
-        '-metrics', 'UniGenMetrics',
-        '-A', 'DepthOfCoverage',
-        '-A', 'AlleleBalance',
-        '-A', 'QualByDepth',
-        '-A', 'HaplotypeScore',
-        '-A', 'MappingQualityRankSumTest',
-        '-A', 'ReadPosRankSumTest',
-        '-A', 'FisherStrand',
-        '-glm', 'both',
-        ]
-    + regions_args
-    + arvados.getjobparam('GATK2_UnifiedGenotyper_args',[]))
-
-# Copy the output VCF file to Keep
-
-out = arvados.CollectionWriter()
-out.start_new_stream()
-out.start_new_file('out.vcf')
-out.write(open(os.path.join(tmpdir, 'out.vcf'), 'rb'))
-
-
-# Write statistics to Keep
-
-out.start_new_file('mincoverage_nlocus.csv')
-sofar = 0
-hist = stats_out_thr.histogram()
-total_bp = stats_log_thr.total_bp()
-for i in range(len(hist)):
-    out.write("%d,%d,%f\n" %
-              (i,
-               total_bp - sofar,
-               100.0 * (total_bp - sofar) / total_bp))
-    sofar += hist[i]
-
-if waitpid_and_check_children(children):
-    this_task.set_output(out.finish())
-else:
-    sys.exit(1)
diff --git a/crunch_scripts/GATK2-realign b/crunch_scripts/GATK2-realign
deleted file mode 100755 (executable)
index 2787dff..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-import re
-import arvados
-import arvados_gatk2
-import arvados_picard
-import arvados_samtools
-from arvados_ipc import *
-
-class InvalidArgumentError(Exception):
-    pass
-
-arvados_samtools.one_task_per_bam_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-tmpdir = arvados.current_task().tmpdir
-arvados.util.clear_tmpdir()
-
-known_sites_files = arvados.getjobparam(
-    'known_sites',
-    ['dbsnp_137.b37.vcf',
-     'Mills_and_1000G_gold_standard.indels.b37.vcf',
-     ])
-bundle_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['gatk_bundle'],
-    files = [
-        'human_g1k_v37.dict',
-        'human_g1k_v37.fasta',
-        'human_g1k_v37.fasta.fai'
-        ] + known_sites_files + [v + '.idx' for v in known_sites_files],
-    path = 'gatk_bundle')
-ref_fasta_files = [os.path.join(bundle_dir, f)
-                   for f in os.listdir(bundle_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f)]
-regions_args = []
-if 'regions' in this_job['script_parameters']:
-    regions_dir = arvados.util.collection_extract(
-        collection = this_job['script_parameters']['regions'],
-        path = 'regions')
-    region_padding = int(this_job['script_parameters']['region_padding'])
-    for f in os.listdir(regions_dir):
-        if re.search(r'\.bed$', f):
-            regions_args += [
-                '--intervals', os.path.join(regions_dir, f),
-                '--interval_padding', str(region_padding)
-                ]
-
-input_collection = this_task['parameters']['input']
-input_dir = arvados.util.collection_extract(
-    collection = input_collection,
-    path = os.path.join(this_task.tmpdir, 'input'))
-input_bam_files = []
-for f in arvados.util.listdir_recursive(input_dir):
-    if re.search(r'\.bam$', f):
-        input_stream_name, input_file_name = os.path.split(f)
-        input_bam_files += [os.path.join(input_dir, f)]
-if len(input_bam_files) != 1:
-    raise InvalidArgumentError("Expected exactly one bam file per task.")
-
-known_sites_args = []
-for f in known_sites_files:
-    known_sites_args += ['-known', os.path.join(bundle_dir, f)]
-
-children = {}
-pipes = {}
-
-arvados_gatk2.run(
-    args=[
-        '-nt', arvados_gatk2.cpus_per_task(),
-        '-T', 'RealignerTargetCreator',
-        '-R', ref_fasta_files[0],
-        '-I', input_bam_files[0],
-        '-o', os.path.join(tmpdir, 'intervals.list')
-        ] + known_sites_args + regions_args)
-
-pipe_setup(pipes, 'IndelRealigner')
-if 0 == named_fork(children, 'IndelRealigner'):
-    pipe_closeallbut(pipes, ('IndelRealigner', 'w'))
-    arvados_gatk2.run(
-        args=[
-        '-T', 'IndelRealigner',
-        '-R', ref_fasta_files[0],
-        '-targetIntervals', os.path.join(tmpdir, 'intervals.list'),
-        '-I', input_bam_files[0],
-        '-o', '/dev/fd/' + str(pipes['IndelRealigner','w']),
-        '--disable_bam_indexing',
-        ] + known_sites_args + regions_args,
-        close_fds=False)
-    os._exit(0)
-os.close(pipes.pop(('IndelRealigner','w'), None))
-
-pipe_setup(pipes, 'bammanifest')
-pipe_setup(pipes, 'bam')
-if 0==named_fork(children, 'bammanifest'):
-    pipe_closeallbut(pipes,
-                     ('IndelRealigner', 'r'),
-                     ('bammanifest', 'w'),
-                     ('bam', 'w'))
-    out = arvados.CollectionWriter()
-    out.start_new_stream(input_stream_name)
-    out.start_new_file(input_file_name)
-    while True:
-        buf = os.read(pipes['IndelRealigner','r'], 2**20)
-        if len(buf) == 0:
-            break
-        os.write(pipes['bam','w'], buf)
-        out.write(buf)
-    os.write(pipes['bammanifest','w'], out.manifest_text())
-    os.close(pipes['bammanifest','w'])
-    os._exit(0)
-
-pipe_setup(pipes, 'index')
-if 0==named_fork(children, 'index'):
-    pipe_closeallbut(pipes, ('bam', 'r'), ('index', 'w'))
-    arvados_picard.run(
-        'BuildBamIndex',
-        params={
-            'i': '/dev/fd/' + str(pipes['bam','r']),
-            'o': '/dev/fd/' + str(pipes['index','w']),
-            'quiet': 'true',
-            'validation_stringency': 'LENIENT'
-            },
-        close_fds=False)
-    os._exit(0)
-
-pipe_setup(pipes, 'indexmanifest')
-if 0==named_fork(children, 'indexmanifest'):
-    pipe_closeallbut(pipes, ('index', 'r'), ('indexmanifest', 'w'))
-    out = arvados.CollectionWriter()
-    out.start_new_stream(input_stream_name)
-    out.start_new_file(re.sub('\.bam$', '.bai', input_file_name))
-    while True:
-        buf = os.read(pipes['index','r'], 2**20)
-        if len(buf) == 0:
-            break
-        out.write(buf)
-    os.write(pipes['indexmanifest','w'], out.manifest_text())
-    os.close(pipes['indexmanifest','w'])
-    os._exit(0)
-
-pipe_closeallbut(pipes, ('bammanifest', 'r'), ('indexmanifest', 'r'))
-outmanifest = ''
-for which in ['bammanifest', 'indexmanifest']:
-    with os.fdopen(pipes[which,'r'], 'rb', 2**20) as f:
-        while True:
-            buf = f.read()
-            if buf == '':
-                break
-            outmanifest += buf
-
-all_ok = True
-for (childname, pid) in children.items():
-    all_ok = all_ok and waitpid_and_check_exit(pid, childname)
-
-if all_ok:
-    this_task.set_output(outmanifest)
-else:
-    sys.exit(1)
diff --git a/crunch_scripts/arvados-bcbio-nextgen.py b/crunch_scripts/arvados-bcbio-nextgen.py
deleted file mode 100755 (executable)
index b7e19ec..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import subprocess
-import crunchutil.subst as subst
-import shutil
-import os
-import sys
-import time
-
-if len(arvados.current_task()['parameters']) > 0:
-    p = arvados.current_task()['parameters']
-else:
-    p = arvados.current_job()['script_parameters']
-
-t = arvados.current_task().tmpdir
-
-os.unlink("/usr/local/share/bcbio-nextgen/galaxy")
-os.mkdir("/usr/local/share/bcbio-nextgen/galaxy")
-shutil.copy("/usr/local/share/bcbio-nextgen/config/bcbio_system.yaml", "/usr/local/share/bcbio-nextgen/galaxy")
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool_data_table_conf.xml", "w") as f:
-    f.write('''<tables>
-    <!-- Locations of indexes in the BWA mapper format -->
-    <table name="bwa_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bwa_index.loc" />
-    </table>
-    <!-- Locations of indexes in the Bowtie2 mapper format -->
-    <table name="bowtie2_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie2_indices.loc" />
-    </table>
-    <!-- Locations of indexes in the Bowtie2 mapper format for TopHat2 to use -->
-    <table name="tophat2_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie2_indices.loc" />
-    </table>
-    <!-- Location of SAMTools indexes and other files -->
-    <table name="sam_fa_indexes" comment_char="#">
-        <columns>index, value, path</columns>
-        <file path="tool-data/sam_fa_indices.loc" />
-    </table>
-    <!-- Location of Picard dict file and other files -->
-    <table name="picard_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/picard_index.loc" />
-    </table>
-    <!-- Location of Picard dict files valid for GATK -->
-    <table name="gatk_picard_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/gatk_sorted_picard_index.loc" />
-    </table>
-</tables>
-''')
-
-os.mkdir("/usr/local/share/bcbio-nextgen/galaxy/tool-data")
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bowtie2_indices.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))\n"))
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bwa_index.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_index))\n"))
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/gatk_sorted_picard_index.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))\n"))
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))\n"))
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f:
-    f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n"))
-
-with open("/tmp/crunch-job/freebayes-variant.yaml", "w") as f:
-    f.write('''
-# Template for whole genome Illumina variant calling with FreeBayes
-# This is a GATK-free pipeline without post-alignment BAM pre-processing
-# (recalibration and realignment)
----
-details:
-  - analysis: variant2
-    genome_build: GRCh37
-    # to do multi-sample variant calling, assign samples the same metadata / batch
-    # metadata:
-    #   batch: your-arbitrary-batch-name
-    algorithm:
-      aligner: bwa
-      mark_duplicates: true
-      recalibrate: false
-      realign: false
-      variantcaller: freebayes
-      platform: illumina
-      quality_format: Standard
-      # for targetted projects, set the region
-      # variant_regions: /path/to/your.bed
-''')
-
-os.unlink("/usr/local/share/bcbio-nextgen/gemini_data")
-os.symlink(arvados.get_job_param_mount("gemini_data"), "/usr/local/share/bcbio-nextgen/gemini_data")
-
-os.chdir(arvados.current_task().tmpdir)
-
-rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/freebayes-variant.yaml", "project1",
-                         subst.do_substitution(p, "$(file $(R1))"),
-                         subst.do_substitution(p, "$(file $(R2))")])
-
-os.chdir("project1/work")
-
-os.symlink("/usr/local/share/bcbio-nextgen/galaxy/tool-data", "tool-data")
-
-rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml", "-n", os.environ['CRUNCH_NODE_SLOTS']])
-
-print("run-command: completed with exit code %i (%s)" % (rcode, "success" if rcode == 0 else "failed"))
-
-if rcode == 0:
-    os.chdir("../final")
-
-    print("arvados-bcbio-nextgen: the follow output files will be saved to keep:")
-
-    subprocess.call(["find", ".", "-type", "f", "-printf", "arvados-bcbio-nextgen: %12.12s %h/%f\\n"])
-
-    print("arvados-bcbio-nextgen: start writing output to keep")
-
-    done = False
-    api = arvados.api('v1')
-    while not done:
-        try:
-            out = arvados.CollectionWriter()
-            out.write_directory_tree(".", max_manifest_depth=0)
-            outuuid = out.finish()
-            api.job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                                 body={
-                                                     'output':outuuid,
-                                                     'success': (rcode == 0),
-                                                     'progress':1.0
-                                                 }).execute()
-            done = True
-        except Exception as e:
-            print("arvados-bcbio-nextgen: caught exception: {}".format(e))
-            time.sleep(5)
-
-sys.exit(rcode)
diff --git a/crunch_scripts/arvados_bwa.py b/crunch_scripts/arvados_bwa.py
deleted file mode 100644 (file)
index aefc1f0..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-import fcntl
-import subprocess
-
-bwa_install_path = None
-
-def install_path():
-    """
-    Extract the bwa source tree, build the bwa binary, and return the
-    path to the source tree.
-    """
-    global bwa_install_path
-    if bwa_install_path:
-        return bwa_install_path
-
-    bwa_install_path = arvados.util.tarball_extract(
-        tarball = arvados.current_job()['script_parameters']['bwa_tbz'],
-        path = 'bwa')
-
-    # build "bwa" binary
-    lockfile = open(os.path.split(bwa_install_path)[0] + '.bwa-make.lock',
-                    'w')
-    fcntl.flock(lockfile, fcntl.LOCK_EX)
-    arvados.util.run_command(['make', '-j16'], cwd=bwa_install_path)
-    lockfile.close()
-
-    return bwa_install_path
-
-def bwa_binary():
-    """
-    Return the path to the bwa executable.
-    """
-    return os.path.join(install_path(), 'bwa')
-
-def run(command, command_args, **kwargs):
-    """
-    Build and run the bwa binary.
-
-    command is the bwa module, e.g., "index" or "aln".
-
-    command_args is a list of additional command line arguments, e.g.,
-    ['-a', 'bwtsw', 'ref.fasta']
-
-    It is assumed that we are running in a Crunch job environment, and
-    the job's "bwa_tbz" parameter is a collection containing the bwa
-    source tree in a .tbz file.
-    """
-    execargs = [bwa_binary(),
-                command]
-    execargs += command_args
-    sys.stderr.write("%s.run: exec %s\n" % (__name__, str(execargs)))
-    arvados.util.run_command(
-        execargs,
-        cwd=arvados.current_task().tmpdir,
-        stderr=sys.stderr,
-        stdin=kwargs.get('stdin', subprocess.PIPE),
-        stdout=kwargs.get('stdout', sys.stderr))
-
-def one_task_per_pair_input_file(if_sequence=0, and_end_task=True):
-    """
-    Queue one task for each pair of fastq files in this job's input
-    collection.
-
-    Each new task will have two parameters, named "input_1" and
-    "input_2", each being a manifest containing a single fastq file.
-
-    A matching pair of files in the input collection is assumed to
-    have names "x_1.y" and "x_2.y".
-
-    Files in the input collection that are not part of a matched pair
-    are silently ignored.
-
-    if_sequence and and_end_task arguments have the same significance
-    as in arvados.job_setup.one_task_per_input_file().
-    """
-    if if_sequence != arvados.current_task()['sequence']:
-        return
-    job_input = arvados.current_job()['script_parameters']['input']
-    cr = arvados.CollectionReader(job_input)
-    all_files = []
-    for s in cr.all_streams():
-        all_files += list(s.all_files())
-    for s in cr.all_streams():
-        for left_file in s.all_files():
-            left_name = left_file.name()
-            right_file = None
-            right_name = re.sub(r'(.*_)1\.', '\g<1>2.', left_name)
-            if right_name == left_name:
-                continue
-            for f2 in s.all_files():
-                if right_name == f2.name():
-                    right_file = f2
-            if right_file != None:
-                new_task_attrs = {
-                    'job_uuid': arvados.current_job()['uuid'],
-                    'created_by_job_task_uuid': arvados.current_task()['uuid'],
-                    'sequence': if_sequence + 1,
-                    'parameters': {
-                        'input_1':left_file.as_manifest(),
-                        'input_2':right_file.as_manifest()
-                        }
-                    }
-                arvados.api().job_tasks().create(body=new_task_attrs).execute()
-    if and_end_task:
-        arvados.api().job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                   body={'success':True}
-                                   ).execute()
-        exit(0)
diff --git a/crunch_scripts/arvados_gatk2.py b/crunch_scripts/arvados_gatk2.py
deleted file mode 100644 (file)
index fa00b44..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-import fcntl
-import subprocess
-
-gatk2_install_path = None
-
-def install_path():
-    global gatk2_install_path
-    if gatk2_install_path:
-        return gatk2_install_path
-    gatk2_install_path = arvados.util.tarball_extract(
-        tarball = arvados.current_job()['script_parameters']['gatk_tbz'],
-        path = 'gatk2')
-    return gatk2_install_path
-
-def memory_limit():
-    taskspernode = int(os.environ.get('CRUNCH_NODE_SLOTS', '1'))
-    with open('/proc/meminfo', 'r') as f:
-        ram = int(re.search(r'MemTotal:\s*(\d+)', f.read()).group(1)) / 1024
-    if taskspernode > 1:
-        ram = ram / taskspernode
-    return max(ram-700, 500)
-
-def cpus_on_this_node():
-    with open('/proc/cpuinfo', 'r') as cpuinfo:
-        return max(int(os.environ.get('SLURM_CPUS_ON_NODE', 1)),
-                   len(re.findall(r'^processor\s*:\s*\d',
-                                  cpuinfo.read(),
-                                  re.MULTILINE)))
-
-def cpus_per_task():
-    return max(1, (cpus_on_this_node()
-                   / int(os.environ.get('CRUNCH_NODE_SLOTS', 1))))
-
-def run(**kwargs):
-    kwargs.setdefault('cwd', arvados.current_task().tmpdir)
-    kwargs.setdefault('stdout', sys.stderr)
-    execargs = ['java',
-                '-Xmx%dm' % memory_limit(),
-                '-Djava.io.tmpdir=' + arvados.current_task().tmpdir,
-                '-jar', os.path.join(install_path(), 'GenomeAnalysisTK.jar')]
-    execargs += [str(arg) for arg in kwargs.pop('args', [])]
-    sys.stderr.write("%s.run: exec %s\n" % (__name__, str(execargs)))
-    return arvados.util.run_command(execargs, **kwargs)
-
diff --git a/crunch_scripts/arvados_ipc.py b/crunch_scripts/arvados_ipc.py
deleted file mode 100644 (file)
index 9787162..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-import re
-import sys
-import subprocess
-
-def pipe_setup(pipes, name):
-    pipes[name,'r'], pipes[name,'w'] = os.pipe()
-
-def pipe_closeallbut(pipes, *keepus):
-    for n,m in pipes.keys():
-        if (n,m) not in keepus:
-            os.close(pipes.pop((n,m), None))
-
-def named_fork(children, name):
-    children[name] = os.fork()
-    return children[name]
-
-def waitpid_and_check_children(children):
-    """
-    Given a dict of childname->pid, wait for each child process to
-    finish, and report non-zero exit status on stderr. Return True if
-    all children exited 0.
-    """
-    all_ok = True
-    for (childname, pid) in children.items():
-        # all_ok must be on RHS here -- we need to call waitpid() on
-        # every child, even if all_ok is already False.
-        all_ok = waitpid_and_check_exit(pid, childname) and all_ok
-    return all_ok
-
-def waitpid_and_check_exit(pid, childname=''):
-    """
-    Wait for a child process to finish. If it exits non-zero, report
-    exit status on stderr (mentioning the given childname) and return
-    False. If it exits zero, return True.
-    """
-    _, childstatus = os.waitpid(pid, 0)
-    exitvalue = childstatus >> 8
-    signal = childstatus & 127
-    dumpedcore = childstatus & 128
-    if childstatus != 0:
-        sys.stderr.write("%s child %d failed: exit %d signal %d core %s\n"
-                         % (childname, pid, exitvalue, signal,
-                            ('y' if dumpedcore else 'n')))
-        return False
-    return True
-
diff --git a/crunch_scripts/arvados_picard.py b/crunch_scripts/arvados_picard.py
deleted file mode 100644 (file)
index 3d830db..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-import fcntl
-import subprocess
-
-picard_install_path = None
-
-def install_path():
-    global picard_install_path
-    if picard_install_path:
-        return picard_install_path
-    zipball = arvados.current_job()['script_parameters']['picard_zip']
-    extracted = arvados.util.zipball_extract(
-        zipball = zipball,
-        path = 'picard')
-    for f in os.listdir(extracted):
-        if (re.search(r'^picard-tools-[\d\.]+$', f) and
-            os.path.exists(os.path.join(extracted, f, '.'))):
-            picard_install_path = os.path.join(extracted, f)
-            break
-    if not picard_install_path:
-        raise Exception("picard-tools-{version} directory not found in %s" %
-                        zipball)
-    return picard_install_path
-
-def run(module, **kwargs):
-    kwargs.setdefault('cwd', arvados.current_task().tmpdir)
-    execargs = ['java',
-                '-Xmx1500m',
-                '-Djava.io.tmpdir=' + arvados.current_task().tmpdir,
-                '-jar', os.path.join(install_path(), module + '.jar')]
-    execargs += [str(arg) for arg in kwargs.pop('args', [])]
-    for key, value in kwargs.pop('params', {}).items():
-        execargs += [key.upper() + '=' + str(value)]
-    sys.stderr.write("%s.run: exec %s\n" % (__name__, str(execargs)))
-    return arvados.util.run_command(execargs, **kwargs)
diff --git a/crunch_scripts/arvados_samtools.py b/crunch_scripts/arvados_samtools.py
deleted file mode 100644 (file)
index 09992f6..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-import fcntl
-import subprocess
-
-samtools_path = None
-
-def samtools_install_path():
-    """
-    Extract the samtools source tree, build the samtools binary, and
-    return the path to the source tree.
-    """
-    global samtools_path
-    if samtools_path:
-        return samtools_path
-    samtools_path = arvados.util.tarball_extract(
-        tarball = arvados.current_job()['script_parameters']['samtools_tgz'],
-        path = 'samtools')
-
-    # build "samtools" binary
-    lockfile = open(os.path.split(samtools_path)[0] + '.samtools-make.lock',
-                    'w')
-    fcntl.flock(lockfile, fcntl.LOCK_EX)
-    arvados.util.run_command(['make', '-j16'], cwd=samtools_path)
-    lockfile.close()
-
-    return samtools_path
-
-def samtools_binary():
-    """
-    Return the path to the samtools executable.
-    """
-    return os.path.join(samtools_install_path(), 'samtools')
-
-def run(command, command_args, **kwargs):
-    """
-    Build and run the samtools binary.
-
-    command is the samtools subcommand, e.g., "view" or "sort".
-
-    command_args is a list of additional command line arguments, e.g.,
-    ['-bt', 'ref_list.txt', '-o', 'aln.bam', 'aln.sam.gz']
-
-    It is assumed that we are running in a Crunch job environment, and
-    the job's "samtools_tgz" parameter is a collection containing the
-    samtools source tree in a .tgz file.
-    """
-    execargs = [samtools_binary(),
-                command]
-    execargs += command_args
-    sys.stderr.write("%s.run: exec %s\n" % (__name__, str(execargs)))
-    arvados.util.run_command(
-        execargs,
-        cwd=arvados.current_task().tmpdir,
-        stdin=kwargs.get('stdin', subprocess.PIPE),
-        stderr=kwargs.get('stderr', sys.stderr),
-        stdout=kwargs.get('stdout', sys.stderr))
-
-def one_task_per_bam_file(if_sequence=0, and_end_task=True):
-    """
-    Queue one task for each bam file in this job's input collection.
-
-    Each new task will have an "input" parameter: a manifest
-    containing one .bam file and (if available) the corresponding .bai
-    index file.
-
-    Files in the input collection that are not named *.bam or *.bai
-    (as well as *.bai files that do not match any .bam file present)
-    are silently ignored.
-
-    if_sequence and and_end_task arguments have the same significance
-    as in arvados.job_setup.one_task_per_input_file().
-    """
-    if if_sequence != arvados.current_task()['sequence']:
-        return
-    job_input = arvados.current_job()['script_parameters']['input']
-    cr = arvados.CollectionReader(job_input)
-    bam = {}
-    bai = {}
-    for s in cr.all_streams():
-        for f in s.all_files():
-            if re.search(r'\.bam$', f.name()):
-                bam[s.name(), f.name()] = f
-            elif re.search(r'\.bai$', f.name()):
-                bai[s.name(), f.name()] = f
-    for ((s_name, f_name), bam_f) in bam.items():
-        bai_f = bai.get((s_name, re.sub(r'bam$', 'bai', f_name)), None)
-        task_input = bam_f.as_manifest()
-        if bai_f:
-            task_input += bai_f.as_manifest()
-        new_task_attrs = {
-            'job_uuid': arvados.current_job()['uuid'],
-            'created_by_job_task_uuid': arvados.current_task()['uuid'],
-            'sequence': if_sequence + 1,
-            'parameters': {
-                'input': task_input
-                }
-            }
-        arvados.api().job_tasks().create(body=new_task_attrs).execute()
-    if and_end_task:
-        arvados.api().job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                         body={'success':True}
-                                         ).execute()
-        exit(0)
diff --git a/crunch_scripts/bwa-aln b/crunch_scripts/bwa-aln
deleted file mode 100755 (executable)
index e3d85a7..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import arvados_bwa
-import arvados_samtools
-import os
-import re
-import sys
-import subprocess
-
-arvados_bwa.one_task_per_pair_input_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['reference_index'],
-    path = 'reference',
-    decompress = False)
-
-ref_basename = None
-for f in os.listdir(ref_dir):
-    basename = re.sub(r'\.bwt$', '', f)
-    if basename != f:
-        ref_basename = os.path.join(ref_dir, basename)
-if ref_basename == None:
-    raise Exception("Could not find *.bwt in reference collection.")
-
-tmp_dir = arvados.current_task().tmpdir
-
-class Aligner:
-    def input_filename(self):
-        for s in arvados.CollectionReader(self.collection).all_streams():
-            for f in s.all_files():
-                return f.decompressed_name()
-    def generate_input(self):
-        for s in arvados.CollectionReader(self.collection).all_streams():
-            for f in s.all_files():
-                for s in f.readall_decompressed():
-                    yield s
-    def aln(self, input_param):
-        self.collection = this_task['parameters'][input_param]
-        reads_filename = os.path.join(tmp_dir, self.input_filename())
-        aln_filename = os.path.join(tmp_dir, self.input_filename() + '.sai')
-        reads_pipe_r, reads_pipe_w = os.pipe()
-        if os.fork() == 0:
-            os.close(reads_pipe_r)
-            reads_file = open(reads_filename, 'wb')
-            for s in self.generate_input():
-                if len(s) != os.write(reads_pipe_w, s):
-                    raise Exception("short write")
-                reads_file.write(s)
-            reads_file.close()
-            os.close(reads_pipe_w)
-            sys.exit(0)
-        os.close(reads_pipe_w)
-
-        aln_file = open(aln_filename, 'wb')
-        bwa_proc = subprocess.Popen(
-            [arvados_bwa.bwa_binary(),
-             'aln', '-t', '16',
-             ref_basename,
-             '-'],
-            stdin=os.fdopen(reads_pipe_r, 'rb', 2**20),
-            stdout=aln_file)
-        aln_file.close()
-        return reads_filename, aln_filename
-
-reads_1, alignments_1 = Aligner().aln('input_1')
-reads_2, alignments_2 = Aligner().aln('input_2')
-pid1, exit1 = os.wait()
-pid2, exit2 = os.wait()
-if exit1 != 0 or exit2 != 0:
-    raise Exception("bwa aln exited non-zero (0x%x, 0x%x)" % (exit1, exit2))
-
-# output alignments in sam format to pipe
-sam_pipe_r, sam_pipe_w = os.pipe()
-sam_pid = os.fork()
-if sam_pid != 0:
-    # parent
-    os.close(sam_pipe_w)
-else:
-    # child
-    os.close(sam_pipe_r)
-    arvados_bwa.run('sampe',
-                    [ref_basename,
-                     alignments_1, alignments_2,
-                     reads_1, reads_2],
-                    stdout=os.fdopen(sam_pipe_w, 'wb', 2**20))
-    sys.exit(0)
-
-# convert sam (sam_pipe_r) to bam (bam_pipe_w)
-bam_pipe_r, bam_pipe_w = os.pipe()
-bam_pid = os.fork()
-if bam_pid != 0:
-    # parent
-    os.close(bam_pipe_w)
-    os.close(sam_pipe_r)
-else:
-    # child
-    os.close(bam_pipe_r)
-    arvados_samtools.run('view',
-                         ['-S', '-b',
-                          '-'],
-                         stdin=os.fdopen(sam_pipe_r, 'rb', 2**20),
-                         stdout=os.fdopen(bam_pipe_w, 'wb', 2**20))
-    sys.exit(0)
-
-# copy bam (bam_pipe_r) to Keep
-out_bam_filename = os.path.split(reads_1)[-1] + '.bam'
-out = arvados.CollectionWriter()
-out.start_new_stream()
-out.start_new_file(out_bam_filename)
-out.write(os.fdopen(bam_pipe_r, 'rb', 2**20))
-
-# make sure everyone exited nicely
-pid3, exit3 = os.waitpid(sam_pid, 0)
-if exit3 != 0:
-    raise Exception("bwa sampe exited non-zero (0x%x)" % exit3)
-pid4, exit4 = os.waitpid(bam_pid, 0)
-if exit4 != 0:
-    raise Exception("samtools view exited non-zero (0x%x)" % exit4)
-
-# proclaim success
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/bwa-index b/crunch_scripts/bwa-index
deleted file mode 100755 (executable)
index f5b7030..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import arvados_bwa
-import os
-import re
-import sys
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['input'],
-    path = 'reference',
-    decompress = False)
-
-ref_fasta_files = (os.path.join(ref_dir, f)
-                   for f in os.listdir(ref_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f))
-
-# build reference index
-arvados_bwa.run('index',
-                ['-a', 'bwtsw'] + list(ref_fasta_files))
-
-# move output files to new empty directory
-out_dir = os.path.join(arvados.current_task().tmpdir, 'out')
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-os.mkdir(out_dir)
-for f in os.listdir(ref_dir):
-    if re.search(r'\.(amb|ann|bwt|pac|rbwt|rpac|rsa|sa)$', f):
-        sys.stderr.write("bwa output: %s (%d)\n" %
-                         (f, os.stat(os.path.join(ref_dir, f)).st_size))
-        os.rename(os.path.join(ref_dir, f),
-                  os.path.join(out_dir, f))
-
-# store output
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, max_manifest_depth=0)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/collection-merge b/crunch_scripts/collection-merge
deleted file mode 100755 (executable)
index f3aa5ce..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# collection-merge
-#
-# Merge two or more collections together.  Can also be used to extract specific
-# files from a collection to produce a new collection.
-#
-# input:
-# An array of collections or collection/file paths in script_parameter["input"]
-#
-# output:
-# A manifest with the collections merged.  Duplicate file names will
-# have their contents concatenated in the order that they appear in the input
-# array.
-
-import arvados
-import md5
-import crunchutil.subst as subst
-import subprocess
-import os
-import hashlib
-
-p = arvados.current_job()['script_parameters']
-
-merged = ""
-src = []
-for c in p["input"]:
-    c = subst.do_substitution(p, c)
-    i = c.find('/')
-    if i == -1:
-        src.append(c)
-        merged += arvados.CollectionReader(c).manifest_text()
-    else:
-        src.append(c[0:i])
-        cr = arvados.CollectionReader(c[0:i])
-        j = c.rfind('/')
-        stream = c[i+1:j]
-        if stream == "":
-            stream = "."
-        fn = c[(j+1):]
-        for s in cr.all_streams():
-            if s.name() == stream:
-                if fn in s.files():
-                    merged += s.files()[fn].as_manifest()
-
-arvados.current_task().set_output(merged)
diff --git a/crunch_scripts/crunchrunner b/crunch_scripts/crunchrunner
deleted file mode 100755 (executable)
index 25d3ba5..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-if test -n "$JOB_PARAMETER_CRUNCHRUNNER" ; then
-    exec $TASK_KEEPMOUNT/$JOB_PARAMETER_CRUNCHRUNNER
-else
-    exec /usr/local/bin/crunchrunner
-fi
diff --git a/crunch_scripts/crunchutil/__init__.py b/crunch_scripts/crunchutil/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/crunch_scripts/crunchutil/robust_put.py b/crunch_scripts/crunchutil/robust_put.py
deleted file mode 100644 (file)
index 27b0bf3..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import arvados.commands.put as put
-import os
-import logging
-import time
-
-def machine_progress(bytes_written, bytes_expected):
-    return "upload wrote {} total {}\n".format(
-        bytes_written, -1 if (bytes_expected is None) else bytes_expected)
-
-class Args(object):
-    def __init__(self, fn):
-        self.filename = None
-        self.paths = [fn]
-        self.max_manifest_depth = 0
-
-# Upload to Keep with error recovery.
-# Return a uuid or raise an exception if there are too many failures.
-def upload(source_dir, logger=None):
-    if logger is None:
-        logger = logging.getLogger("arvados")
-
-    source_dir = os.path.abspath(source_dir)
-    done = False
-    if 'TASK_WORK' in os.environ:
-        resume_cache = put.ResumeCache(os.path.join(arvados.current_task().tmpdir, "upload-output-checkpoint"))
-    else:
-        resume_cache = put.ResumeCache(put.ResumeCache.make_path(Args(source_dir)))
-    reporter = put.progress_writer(machine_progress)
-    bytes_expected = put.expected_bytes_for([source_dir])
-    backoff = 1
-    outuuid = None
-    while not done:
-        try:
-            out = put.ArvPutCollectionWriter.from_cache(resume_cache, reporter, bytes_expected)
-            out.do_queued_work()
-            out.write_directory_tree(source_dir, max_manifest_depth=0)
-            outuuid = out.finish()
-            done = True
-        except KeyboardInterrupt as e:
-            logger.critical("caught interrupt signal 2")
-            raise e
-        except Exception as e:
-            logger.exception("caught exception:")
-            backoff *= 2
-            if backoff > 256:
-                logger.critical("Too many upload failures, giving up")
-                raise e
-            else:
-                logger.warning("Sleeping for %s seconds before trying again" % backoff)
-                time.sleep(backoff)
-    return outuuid
diff --git a/crunch_scripts/crunchutil/subst.py b/crunch_scripts/crunchutil/subst.py
deleted file mode 100644 (file)
index 53def97..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import glob
-import os
-import re
-import stat
-
-BACKSLASH_ESCAPE_RE = re.compile(r'\\(.)')
-
-class SubstitutionError(Exception):
-    pass
-
-def search(c):
-    DEFAULT = 0
-    DOLLAR = 1
-
-    i = 0
-    state = DEFAULT
-    start = None
-    depth = 0
-    while i < len(c):
-        if c[i] == '\\':
-            i += 1
-        elif state == DEFAULT:
-            if c[i] == '$':
-                state = DOLLAR
-                if depth == 0:
-                    start = i
-            elif c[i] == ')':
-                if depth == 1:
-                    return [start, i]
-                if depth > 0:
-                    depth -= 1
-        elif state == DOLLAR:
-            if c[i] == '(':
-                depth += 1
-            state = DEFAULT
-        i += 1
-    if depth != 0:
-        raise SubstitutionError("Substitution error, mismatched parentheses {}".format(c))
-    return None
-
-def sub_file(v):
-    path = os.path.join(os.environ['TASK_KEEPMOUNT'], v)
-    st = os.stat(path)
-    if st and stat.S_ISREG(st.st_mode):
-        return path
-    else:
-        raise SubstitutionError("$(file {}) is not accessible or is not a regular file".format(path))
-
-def sub_dir(v):
-    d = os.path.dirname(v)
-    if d == '':
-        d = v
-    path = os.path.join(os.environ['TASK_KEEPMOUNT'], d)
-    st = os.stat(path)
-    if st and stat.S_ISDIR(st.st_mode):
-        return path
-    else:
-        raise SubstitutionError("$(dir {}) is not accessible or is not a directory".format(path))
-
-def sub_basename(v):
-    return os.path.splitext(os.path.basename(v))[0]
-
-def sub_glob(v):
-    l = glob.glob(v)
-    if len(l) == 0:
-        raise SubstitutionError("$(glob {}) no match found".format(v))
-    else:
-        return l[0]
-
-default_subs = {"file ": sub_file,
-                "dir ": sub_dir,
-                "basename ": sub_basename,
-                "glob ": sub_glob}
-
-def do_substitution(p, c, subs=default_subs):
-    while True:
-        m = search(c)
-        if m is None:
-            return BACKSLASH_ESCAPE_RE.sub(r'\1', c)
-
-        v = do_substitution(p, c[m[0]+2 : m[1]])
-        var = True
-        for sub in subs:
-            if v.startswith(sub):
-                r = subs[sub](v[len(sub):])
-                var = False
-                break
-        if var:
-            if v in p:
-                r = p[v]
-            else:
-                raise SubstitutionError("Unknown variable or function '%s' while performing substitution on '%s'" % (v, c))
-            if r is None:
-                raise SubstitutionError("Substitution for '%s' is null while performing substitution on '%s'" % (v, c))
-            if not isinstance(r, basestring):
-                raise SubstitutionError("Substitution for '%s' must be a string while performing substitution on '%s'" % (v, c))
-
-        c = c[:m[0]] + r + c[m[1]+1:]
diff --git a/crunch_scripts/crunchutil/vwd.py b/crunch_scripts/crunchutil/vwd.py
deleted file mode 100644 (file)
index 3245da1..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import stat
-import arvados.commands.run
-import logging
-
-# Implements "Virtual Working Directory"
-# Provides a way of emulating a shared writable directory in Keep based
-# on a "check out, edit, check in, merge" model.
-# At the moment, this only permits adding new files, applications
-# cannot modify or delete existing files.
-
-# Create a symlink tree rooted at target_dir mirroring arv-mounted
-# source_collection.  target_dir must be empty, and will be created if it
-# doesn't exist.
-def checkout(source_collection, target_dir, keepmount=None):
-    # create symlinks
-    if keepmount is None:
-        keepmount = os.environ['TASK_KEEPMOUNT']
-
-    if not os.path.exists(target_dir):
-        os.makedirs(target_dir)
-
-    l = os.listdir(target_dir)
-    if len(l) > 0:
-        raise Exception("target_dir must be empty before checkout, contains %s" % l)
-
-    stem = os.path.join(keepmount, source_collection)
-    for root, dirs, files in os.walk(os.path.join(keepmount, source_collection), topdown=True):
-        rel = root[len(stem)+1:]
-        for d in dirs:
-            os.mkdir(os.path.join(target_dir, rel, d))
-        for f in files:
-            os.symlink(os.path.join(root, f), os.path.join(target_dir, rel, f))
-
-def checkin(target_dir):
-    """Write files in `target_dir` to Keep.
-
-    Regular files or symlinks to files outside the keep mount are written to
-    Keep as normal files (Keep does not support symlinks).
-
-    Symlinks to files in the keep mount will result in files in the new
-    collection which reference existing Keep blocks, no data copying necessary.
-
-    Returns a new Collection object, with data flushed but the collection record
-    not saved to the API.
-
-    """
-
-    outputcollection = arvados.collection.Collection(num_retries=5)
-
-    if target_dir[-1:] != '/':
-        target_dir += '/'
-
-    collections = {}
-
-    logger = logging.getLogger("arvados")
-
-    last_error = None
-    for root, dirs, files in os.walk(target_dir):
-        for f in files:
-            try:
-                s = os.lstat(os.path.join(root, f))
-
-                writeIt = False
-
-                if stat.S_ISREG(s.st_mode):
-                    writeIt = True
-                elif stat.S_ISLNK(s.st_mode):
-                    # 1. check if it is a link into a collection
-                    real = os.path.split(os.path.realpath(os.path.join(root, f)))
-                    (pdh, branch) = arvados.commands.run.is_in_collection(real[0], real[1])
-                    if pdh is not None:
-                        # 2. load collection
-                        if pdh not in collections:
-                            # 2.1 make sure it is flushed (see #5787 note 11)
-                            fd = os.open(real[0], os.O_RDONLY)
-                            os.fsync(fd)
-                            os.close(fd)
-
-                            # 2.2 get collection from API server
-                            collections[pdh] = arvados.collection.CollectionReader(pdh,
-                                                                                   api_client=outputcollection._my_api(),
-                                                                                   keep_client=outputcollection._my_keep(),
-                                                                                   num_retries=5)
-                        # 3. copy arvfile to new collection
-                        outputcollection.copy(branch, os.path.join(root[len(target_dir):], f), source_collection=collections[pdh])
-                    else:
-                        writeIt = True
-
-                if writeIt:
-                    reldir = root[len(target_dir):]
-                    with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
-                        with open(os.path.join(root, f), "rb") as reader:
-                            dat = reader.read(64*1024)
-                            while dat:
-                                writer.write(dat)
-                                dat = reader.read(64*1024)
-            except (IOError, OSError) as e:
-                logger.error(e)
-                last_error = e
-
-    return (outputcollection, last_error)
diff --git a/crunch_scripts/cwl-runner b/crunch_scripts/cwl-runner
deleted file mode 100755 (executable)
index 0c79844..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Crunch script integration for running arvados-cwl-runner inside a crunch job.
-
-import arvados_cwl
-import sys
-
-try:
-    # Use the crunch script defined in the arvados_cwl package.  This helps
-    # prevent the crunch script from going out of sync with the rest of the
-    # arvados_cwl package.
-    import arvados_cwl.crunch_script
-    arvados_cwl.crunch_script.run()
-    sys.exit()
-except ImportError:
-    pass
-
-# When running against an older arvados-cwl-runner package without
-# arvados_cwl.crunch_script, fall back to the old code.
-
-
-# This gets the job record, transforms the script parameters into a valid CWL
-# input object, then executes the CWL runner to run the underlying workflow or
-# tool.  When the workflow completes, record the output object in an output
-# collection for this runner job.
-
-import arvados
-import arvados.collection
-import arvados.util
-import cwltool.main
-import logging
-import os
-import json
-import argparse
-import re
-import functools
-
-from arvados.api import OrderedJsonModel
-from cwltool.process import shortname, adjustFileObjs, adjustDirObjs, getListing, normalizeFilesDirs
-from cwltool.load_tool import load_tool
-
-# Print package versions
-logging.info(cwltool.main.versionstring())
-
-api = arvados.api("v1")
-
-try:
-    job_order_object = arvados.current_job()['script_parameters']
-
-    pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')
-
-    def keeppath(v):
-        if pdh_path.match(v):
-            return "keep:%s" % v
-        else:
-            return v
-
-    def keeppathObj(v):
-        v["location"] = keeppath(v["location"])
-
-    job_order_object["cwl:tool"] = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"])
-
-    for k,v in job_order_object.items():
-        if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
-            job_order_object[k] = {
-                "class": "File",
-                "location": "keep:%s" % v
-            }
-
-    adjustFileObjs(job_order_object, keeppathObj)
-    adjustDirObjs(job_order_object, keeppathObj)
-    normalizeFilesDirs(job_order_object)
-    adjustDirObjs(job_order_object, functools.partial(getListing, arvados_cwl.fsaccess.CollectionFsAccess("", api_client=api)))
-
-    output_name = None
-    if "arv:output_name" in job_order_object:
-        output_name = job_order_object["arv:output_name"]
-        del job_order_object["arv:output_name"]
-
-    runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()),
-                                      output_name=output_name)
-
-    t = load_tool(job_order_object, runner.arv_make_tool)
-
-    args = argparse.Namespace()
-    args.project_uuid = arvados.current_job()["owner_uuid"]
-    args.enable_reuse = True
-    args.submit = False
-    args.debug = True
-    args.quiet = False
-    args.ignore_docker_for_reuse = False
-    args.basedir = os.getcwd()
-    args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
-    outputObj = runner.arv_executor(t, job_order_object, **vars(args))
-
-    if runner.final_output_collection:
-        outputCollection = runner.final_output_collection.portable_data_hash()
-    else:
-        outputCollection = None
-
-    api.job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                         body={
-                                             'output': outputCollection,
-                                             'success': True,
-                                             'progress':1.0
-                                         }).execute()
-except Exception as e:
-    logging.exception("Unhandled exception")
-    api.job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                         body={
-                                             'output': None,
-                                             'success': False,
-                                             'progress':1.0
-                                         }).execute()
diff --git a/crunch_scripts/decompress-all.py b/crunch_scripts/decompress-all.py
deleted file mode 100755 (executable)
index 100ea12..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-#
-# decompress-all.py
-#
-# Decompress all compressed files in the collection using the "dtrx" tool and
-# produce a new collection with the contents.  Uncompressed files
-# are passed through.
-#
-# input:
-# A collection at script_parameters["input"]
-#
-# output:
-# A manifest of the uncompressed contents of the input collection.
-
-import arvados
-import re
-import subprocess
-import os
-import sys
-import crunchutil.robust_put as robust_put
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
-                                          input_as_path=True)
-
-task = arvados.current_task()
-
-input_file = task['parameters']['input']
-
-infile_parts = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)?(/[^/]+)$", input_file)
-
-outdir = os.path.join(task.tmpdir, "output")
-os.makedirs(outdir)
-os.chdir(outdir)
-
-if infile_parts is None:
-    print >>sys.stderr, "Failed to parse input filename '%s' as a Keep file\n" % input_file
-    sys.exit(1)
-
-cr = arvados.CollectionReader(infile_parts.group(1))
-streamname = infile_parts.group(3)[1:]
-filename = infile_parts.group(4)[1:]
-
-if streamname is not None:
-    subprocess.call(["mkdir", "-p", streamname])
-    os.chdir(streamname)
-else:
-    streamname = '.'
-
-m = re.match(r'.*\.(gz|Z|bz2|tgz|tbz|zip|rar|7z|cab|deb|rpm|cpio|gem)$', arvados.get_task_param_mount('input'), re.IGNORECASE)
-
-if m is not None:
-    rc = subprocess.call(["dtrx", "-r", "-n", "-q", arvados.get_task_param_mount('input')])
-    if rc == 0:
-        task.set_output(robust_put.upload(outdir))
-    else:
-        sys.exit(rc)
-else:
-    streamreader = filter(lambda s: s.name() == streamname, cr.all_streams())[0]
-    filereader = streamreader.files()[filename]
-    task.set_output(streamname + filereader.as_manifest()[1:])
diff --git a/crunch_scripts/file-select b/crunch_scripts/file-select
deleted file mode 100755 (executable)
index c4af05c..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-this_job_input = this_job['script_parameters']['input']
-manifest_text = ""
-for f in arvados.CollectionReader(this_job_input).all_files():
-    if f.name() in this_job['script_parameters']['names']:
-        manifest_text += f.as_manifest()
-
-this_task.set_output(arvados.Keep.put(manifest_text))
diff --git a/crunch_scripts/grep b/crunch_scripts/grep
deleted file mode 100755 (executable)
index a84c0f6..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-this_task_input = this_task['parameters']['input']
-pattern = re.compile(this_job['script_parameters']['pattern'])
-
-input_file = list(arvados.CollectionReader(this_task_input).all_files())[0]
-out = arvados.CollectionWriter()
-out.set_current_file_name(input_file.decompressed_name())
-out.set_current_stream_name(input_file.stream_name())
-for line in input_file.readlines():
-    if pattern.search(line):
-        out.write(line)
-
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/hash b/crunch_scripts/hash
deleted file mode 100755 (executable)
index 56eec7a..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python                                                                                                                                                                            
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import hashlib
-import os
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-
-if 'algorithm' in this_job['script_parameters']:
-    alg = this_job['script_parameters']['algorithm']
-else:
-    alg = 'md5'
-digestor = hashlib.new(alg)
-
-input_file = arvados.get_task_param_mount('input')
-
-with open(input_file) as f:
-    while True:
-        buf = f.read(2**20)
-        if len(buf) == 0:
-            break
-        digestor.update(buf)
-
-hexdigest = digestor.hexdigest()
-
-file_name = '/'.join(this_task['parameters']['input'].split('/')[1:])
-
-out = arvados.CollectionWriter()
-out.set_current_file_name("md5sum.txt")
-out.write("%s %s\n" % (hexdigest, file_name))
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/pgp-survey-import b/crunch_scripts/pgp-survey-import
deleted file mode 100755 (executable)
index f12e84b..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import string
-import json
-import UserDict
-import sys
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-this_job_input = this_job['script_parameters']['input']
-
-out = arvados.CollectionWriter()
-out.set_current_file_name("arvados_objects.json")
-out.write("[\n")
-separator = ""
-
-traits = {}
-done_bytes = 0
-done_ratio = 0
-for input_file in arvados.CollectionReader(this_job_input).all_files():
-    for line_number, line in enumerate(input_file.readlines()):
-
-        done_bytes += len(line)
-        new_done_ratio = 1.0 * done_bytes / input_file.size()
-        if line_number == 2 or new_done_ratio - done_ratio > 0.05:
-            sys.stderr.write("progress: %d%% after %d lines\n" % (int(done_ratio * 100), line_number+1))
-            done_ratio = new_done_ratio
-
-        words = string.split(string.strip(line), "\t")
-        if line_number == 0:
-            headings = words
-            for t in arvados.api('v1').traits().list(
-                where={'name':words},
-                limit=1000
-                ).execute()['items']:
-                traits[t['name']] = t
-            for i, trait_name in enumerate(words[3:], start=3):
-                # find or create trait
-                if trait_name not in traits:
-                    traits_match = arvados.api('v1').traits().list(
-                        where={'name':trait_name}
-                        ).execute()['items']
-                    if len(traits_match) > 0:
-                        traits[trait_name] = traits_match[0]
-                    else:
-                        traits[trait_name] = arvados.api('v1').traits().create(
-                            trait={'name':trait_name}).execute()
-                out.write(separator)
-                out.write(json.dumps(traits[trait_name]))
-                separator = ",\n"
-        else:
-            huID_links_match = arvados.api('v1').links().list(
-                where={'link_class':'identifier','name':words[0]}
-                ).execute()['items']
-            if len(huID_links_match) > 0:
-                human_uuid = huID_links_match[0]['head_uuid']
-            else:
-                human = arvados.api('v1').humans().create(
-                    body={}
-                    ).execute()
-                huID_link = arvados.api('v1').links().create(
-                    body={
-                        'link_class':'identifier',
-                        'name':words[0],
-                        'head_kind':'arvados#human',
-                        'head_uuid':human['uuid']
-                        }
-                    ).execute()
-                human_uuid = human['uuid']
-            human_trait = {}
-            for t in arvados.api('v1').links().list(
-                limit=10000,
-                where={
-                    'tail_uuid':human_uuid,
-                    'tail_kind':'arvados#human',
-                    'head_kind':'arvados#trait',
-                    'link_class':'human_trait',
-                    'name':'pgp-survey-response'
-                    }
-                ).execute()['items']:
-                human_trait[t['head_uuid']] = t
-            for i, trait_value in enumerate(words[3:], start=3):
-                trait_uuid = traits[headings[i]]['uuid']
-                if trait_uuid in human_trait:
-                    trait_link = human_trait[trait_uuid]
-                    if trait_link['properties']['value'] != trait_value:
-                        # update database value to match survey response
-                        trait_link['properties']['value'] = trait_value
-                        arvados.api('v1').links().update(
-                            uuid=trait_link['uuid'],
-                            body={'properties':trait_link['properties']}
-                            ).execute()
-                    out.write(",\n")
-                    out.write(json.dumps(trait_link))
-                elif trait_value == '':
-                    # nothing in database, nothing in input
-                    pass
-                else:
-                    trait_link = {
-                        'tail_uuid':human_uuid,
-                        'tail_kind':'arvados#human',
-                        'head_uuid':traits[headings[i]]['uuid'],
-                        'head_kind':'arvados#trait',
-                        'link_class':'human_trait',
-                        'name':'pgp-survey-response',
-                        'properties': { 'value': trait_value }
-                        }
-                    arvados.api('v1').links().create(
-                        body=trait_link
-                        ).execute()
-                    out.write(",\n")
-                    out.write(json.dumps(trait_link))
-
-out.write("\n]\n")
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/pgp-survey-parse b/crunch_scripts/pgp-survey-parse
deleted file mode 100755 (executable)
index ee852f1..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-parser_path = arvados.util.git_checkout(
-    url = this_job['script_parameters']['parser_url'],
-    version = this_job['script_parameters']['parser_version'],
-    path = 'parser')
-
-stdoutdata, stderrdata = arvados.util.run_command(
-    ["python", "demo.py"],
-    cwd=parser_path)
-
-out = arvados.CollectionWriter()
-out.write(stdoutdata)
-out.set_current_file_name('participant_traits.tsv')
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/picard-gatk2-prep b/crunch_scripts/picard-gatk2-prep
deleted file mode 100755 (executable)
index 976060f..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import subprocess
-import arvados_picard
-from arvados_ipc import *
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['reference'],
-    path = 'reference',
-    decompress = True)
-ref_fasta_files = [os.path.join(ref_dir, f)
-                   for f in os.listdir(ref_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f)]
-input_collection = this_task['parameters']['input']
-
-for s in arvados.CollectionReader(input_collection).all_streams():
-    for f in s.all_files():
-        input_stream_name = s.name()
-        input_file_name = f.name()
-        break
-
-# Unfortunately, picard FixMateInformation cannot read from a pipe. We
-# must copy the input to a temporary file before running picard.
-input_bam_path = os.path.join(this_task.tmpdir, input_file_name)
-with open(input_bam_path, 'wb') as bam:
-    for s in arvados.CollectionReader(input_collection).all_streams():
-        for f in s.all_files():
-            for s in f.readall():
-                bam.write(s)
-
-children = {}
-pipes = {}
-
-pipe_setup(pipes, 'fixmate')
-if 0==named_fork(children, 'fixmate'):
-    pipe_closeallbut(pipes, ('fixmate', 'w'))
-    arvados_picard.run(
-        'FixMateInformation',
-        params={
-            'i': input_bam_path,
-            'o': '/dev/stdout',
-            'quiet': 'true',
-            'so': 'coordinate',
-            'validation_stringency': 'LENIENT',
-            'compression_level': 0
-            },
-        stdout=os.fdopen(pipes['fixmate','w'], 'wb', 2**20))
-    os._exit(0)
-os.close(pipes.pop(('fixmate','w'), None))
-
-pipe_setup(pipes, 'sortsam')
-if 0==named_fork(children, 'sortsam'):
-    pipe_closeallbut(pipes, ('fixmate', 'r'), ('sortsam', 'w'))
-    arvados_picard.run(
-        'SortSam',
-        params={
-            'i': '/dev/stdin',
-            'o': '/dev/stdout',
-            'quiet': 'true',
-            'so': 'coordinate',
-            'validation_stringency': 'LENIENT',
-            'compression_level': 0
-            },
-        stdin=os.fdopen(pipes['fixmate','r'], 'rb', 2**20),
-        stdout=os.fdopen(pipes['sortsam','w'], 'wb', 2**20))
-    os._exit(0)
-
-pipe_setup(pipes, 'reordersam')
-if 0==named_fork(children, 'reordersam'):
-    pipe_closeallbut(pipes, ('sortsam', 'r'), ('reordersam', 'w'))
-    arvados_picard.run(
-        'ReorderSam',
-        params={
-            'i': '/dev/stdin',
-            'o': '/dev/stdout',
-            'reference': ref_fasta_files[0],
-            'quiet': 'true',
-            'validation_stringency': 'LENIENT',
-            'compression_level': 0
-            },
-        stdin=os.fdopen(pipes['sortsam','r'], 'rb', 2**20),
-        stdout=os.fdopen(pipes['reordersam','w'], 'wb', 2**20))
-    os._exit(0)
-
-pipe_setup(pipes, 'addrg')
-if 0==named_fork(children, 'addrg'):
-    pipe_closeallbut(pipes, ('reordersam', 'r'), ('addrg', 'w'))
-    arvados_picard.run(
-        'AddOrReplaceReadGroups',
-        params={
-            'i': '/dev/stdin',
-            'o': '/dev/stdout',
-            'quiet': 'true',
-            'rglb': this_job['script_parameters'].get('rglb', 0),
-            'rgpl': this_job['script_parameters'].get('rgpl', 'illumina'),
-            'rgpu': this_job['script_parameters'].get('rgpu', 0),
-            'rgsm': this_job['script_parameters'].get('rgsm', 0),
-            'validation_stringency': 'LENIENT'
-            },
-        stdin=os.fdopen(pipes['reordersam','r'], 'rb', 2**20),
-        stdout=os.fdopen(pipes['addrg','w'], 'wb', 2**20))
-    os._exit(0)
-
-pipe_setup(pipes, 'bammanifest')
-pipe_setup(pipes, 'bam')
-pipe_setup(pipes, 'casm_in')
-if 0==named_fork(children, 'bammanifest'):
-    pipe_closeallbut(pipes,
-                     ('addrg', 'r'),
-                     ('bammanifest', 'w'),
-                     ('bam', 'w'),
-                     ('casm_in', 'w'))
-    out = arvados.CollectionWriter()
-    out.start_new_stream(input_stream_name)
-    out.start_new_file(input_file_name)
-    while True:
-        buf = os.read(pipes['addrg','r'], 2**20)
-        if len(buf) == 0:
-            break
-        os.write(pipes['bam','w'], buf)
-        os.write(pipes['casm_in','w'], buf)
-        out.write(buf)
-    os.write(pipes['bammanifest','w'], out.manifest_text())
-    os.close(pipes['bammanifest','w'])
-    os._exit(0)
-
-pipe_setup(pipes, 'casm')
-if 0 == named_fork(children, 'casm'):
-    pipe_closeallbut(pipes, ('casm_in', 'r'), ('casm', 'w'))
-    arvados_picard.run(
-        'CollectAlignmentSummaryMetrics',
-        params={
-            'input': '/dev/fd/' + str(pipes['casm_in','r']),
-            'output': '/dev/fd/' + str(pipes['casm','w']),
-            'reference_sequence': ref_fasta_files[0],
-            'validation_stringency': 'LENIENT',
-            },
-        close_fds=False)
-    os._exit(0)
-
-pipe_setup(pipes, 'index')
-if 0==named_fork(children, 'index'):
-    pipe_closeallbut(pipes, ('bam', 'r'), ('index', 'w'))
-    arvados_picard.run(
-        'BuildBamIndex',
-        params={
-            'i': '/dev/stdin',
-            'o': '/dev/stdout',
-            'quiet': 'true',
-            'validation_stringency': 'LENIENT'
-            },
-        stdin=os.fdopen(pipes['bam','r'], 'rb', 2**20),
-        stdout=os.fdopen(pipes['index','w'], 'wb', 2**20))
-    os._exit(0)
-
-pipe_setup(pipes, 'indexmanifest')
-if 0==named_fork(children, 'indexmanifest'):
-    pipe_closeallbut(pipes, ('index', 'r'), ('indexmanifest', 'w'))
-    out = arvados.CollectionWriter()
-    out.start_new_stream(input_stream_name)
-    out.start_new_file(re.sub('\.bam$', '.bai', input_file_name))
-    while True:
-        buf = os.read(pipes['index','r'], 2**20)
-        if len(buf) == 0:
-            break
-        out.write(buf)
-    os.write(pipes['indexmanifest','w'], out.manifest_text())
-    os.close(pipes['indexmanifest','w'])
-    os._exit(0)
-
-pipe_closeallbut(pipes,
-                 ('bammanifest', 'r'),
-                 ('indexmanifest', 'r'),
-                 ('casm', 'r'))
-
-outmanifest = ''
-
-for which in ['bammanifest', 'indexmanifest']:
-    with os.fdopen(pipes[which,'r'], 'rb', 2**20) as f:
-        while True:
-            buf = f.read()
-            if buf == '':
-                break
-            outmanifest += buf
-
-casm_out = arvados.CollectionWriter()
-casm_out.start_new_stream(input_stream_name)
-casm_out.start_new_file(input_file_name + '.casm.tsv')
-casm_out.write(os.fdopen(pipes.pop(('casm','r'))))
-
-outmanifest += casm_out.manifest_text()
-
-all_ok = True
-for (childname, pid) in children.items():
-    all_ok = all_ok and waitpid_and_check_exit(pid, childname)
-
-if all_ok:
-    this_task.set_output(outmanifest)
-else:
-    sys.exit(1)
diff --git a/crunch_scripts/pyrtg.py b/crunch_scripts/pyrtg.py
deleted file mode 100644 (file)
index d733270..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-
-rtg_install_path = None
-
-def setup():
-    global rtg_install_path
-    if rtg_install_path:
-        return rtg_install_path
-    rtg_path = arvados.util.zipball_extract(
-        zipball = arvados.current_job()['script_parameters']['rtg_binary_zip'],
-        path = 'rtg')
-    rtg_license_path = arvados.util.collection_extract(
-        collection = arvados.current_job()['script_parameters']['rtg_license'],
-        path = 'license',
-        decompress = False)
-
-    # symlink to rtg-license.txt
-    license_txt_path = os.path.join(rtg_license_path, 'rtg-license.txt')
-    try:
-        os.symlink(license_txt_path, os.path.join(rtg_path,'rtg-license.txt'))
-    except OSError:
-        if not os.path.exists(os.path.join(rtg_path,'rtg-license.txt')):
-            os.symlink(license_txt_path, os.path.join(rtg_path,'rtg-license.txt'))
-
-    rtg_install_path = rtg_path
-    return rtg_path
-
-def run_rtg(command, output_dir, command_args, **kwargs):
-    global rtg_install_path
-    execargs = [os.path.join(rtg_install_path, 'rtg'),
-                command,
-                '-o', output_dir]
-    execargs += command_args
-    sys.stderr.write("run_rtg: exec %s\n" % str(execargs))
-    arvados.util.run_command(
-        execargs,
-        cwd=arvados.current_task().tmpdir,
-        stderr=sys.stderr,
-        stdout=sys.stderr)
-
-    # Exit status cannot be trusted in rtg 1.1.1.
-    assert_done(output_dir)
-
-    # Copy log files to stderr and delete them to avoid storing them
-    # in Keep with the output data.
-    for dirent in arvados.util.listdir_recursive(output_dir):
-        if is_log_file(dirent):
-            log_file = os.path.join(output_dir, dirent)
-            sys.stderr.write(' '.join(['==>', dirent, '<==\n']))
-            with open(log_file, 'rb') as f:
-                while True:
-                    buf = f.read(2**20)
-                    if len(buf) == 0:
-                        break
-                    sys.stderr.write(buf)
-            sys.stderr.write('\n') # in case log does not end in newline
-            os.unlink(log_file)
-
-def assert_done(output_dir):
-    # Sanity-check exit code.
-    done_file = os.path.join(output_dir, 'done')
-    if not os.path.exists(done_file):
-        raise Exception("rtg exited 0 but %s does not exist. abort.\n" % done_file)
-
-def is_log_file(filename):
-    return re.search(r'^(.*/)?(progress|done|\S+.log)$', filename)
-
-setup()
diff --git a/crunch_scripts/rtg-fasta2sdf b/crunch_scripts/rtg-fasta2sdf
deleted file mode 100755 (executable)
index f1ef617..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import pyrtg
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-fasta_path = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['input'],
-    path = 'fasta',
-    decompress = False)
-fasta_files = filter(lambda f: f != '.locator', os.listdir(fasta_path))
-out_dir = os.path.join(arvados.current_task().tmpdir, 'ref-sdf')
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-
-pyrtg.run_rtg('format', out_dir,
-              map(lambda f: os.path.join(fasta_path, f), fasta_files))
-
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, max_manifest_depth=0)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/rtg-fastq2sdf b/crunch_scripts/rtg-fastq2sdf
deleted file mode 100755 (executable)
index e42697f..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import pyrtg
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-fastq_path = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['input'],
-    path = 'fastq')
-fastq_files = filter(lambda f: f != '.locator', os.listdir(fastq_path))
-tmp_dir_base = os.path.join(arvados.current_task().tmpdir, 'tmp')
-out_dir = os.path.join(arvados.current_task().tmpdir, 'reads')
-
-arvados.util.run_command(['rm', '-rf', tmp_dir_base], stderr=sys.stderr)
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-os.mkdir(tmp_dir_base)
-
-# convert fastq to sdf
-tmp_dirs = []
-for leftarm in fastq_files:
-    if re.search(r'_1.f(ast)?q(.gz)?$', leftarm):
-        rightarm = re.sub(r'_1(.f(ast)?q(.gz)?)$', '_2\\1', leftarm)
-        if rightarm in fastq_files:
-            tmp_dirs += ['%s/%08d' % (tmp_dir_base, len(tmp_dirs))]
-            pyrtg.run_rtg('format', tmp_dirs[-1],
-                          ['-f', 'fastq',
-                           '-q', 'sanger',
-                           '-l', os.path.join(fastq_path, leftarm),
-                           '-r', os.path.join(fastq_path, rightarm)])
-
-# split sdf
-pyrtg.run_rtg('sdfsplit', out_dir,
-              ['-n', '1500000'] + tmp_dirs)
-
-# store output
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, max_manifest_depth=1)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/rtg-map b/crunch_scripts/rtg-map
deleted file mode 100755 (executable)
index f740888..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import pyrtg
-
-arvados.job_setup.one_task_per_input_stream(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-in_dir = os.path.join(this_task.tmpdir, 'input')
-arvados.util.run_command(['rm', '-rf', in_dir], stderr=sys.stderr)
-in_dir = arvados.util.stream_extract(
-    stream = arvados.StreamReader(this_task['parameters']['input']),
-    path = in_dir,
-    decompress = False)
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['reference'],
-    path = 'reference',
-    decompress = False)
-
-out_dir = os.path.join(arvados.current_task().tmpdir, 'out')
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-
-# map reads
-pyrtg.run_rtg('map', out_dir,
-              ['-i', in_dir,
-               '-t', ref_dir,
-               '-a', '2',
-               '-b', '1',
-               '--sam-rg', '@RG\\tID:NA\\tSM:NA\\tPL:ILLUMINA'])
-
-# store output
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, this_task['parameters']['input'][0], 0)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/rtg-snp b/crunch_scripts/rtg-snp
deleted file mode 100755 (executable)
index 1d8a605..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import pyrtg
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['reference'],
-    path = 'reference',
-    decompress = False)
-input_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['input'],
-    path = 'input')
-bam_files = map(lambda f: os.path.join(input_dir, f),
-                filter(lambda f: re.search(r'^(.*/)?alignments.bam$', f),
-                       arvados.util.listdir_recursive(input_dir)))
-out_dir = os.path.join(arvados.current_task().tmpdir, 'out')
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-
-# call sequence variants
-pyrtg.run_rtg('snp', out_dir,
-              ['-t', ref_dir] + bam_files)
-
-# store output
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, max_manifest_depth=0)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/run-command b/crunch_scripts/run-command
deleted file mode 100755 (executable)
index 3fd08bf..0000000
+++ /dev/null
@@ -1,458 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import logging
-
-logger = logging.getLogger('run-command')
-log_handler = logging.StreamHandler()
-log_handler.setFormatter(logging.Formatter("run-command: %(message)s"))
-logger.addHandler(log_handler)
-logger.setLevel(logging.INFO)
-
-import arvados
-import re
-import os
-import subprocess
-import sys
-import shutil
-import crunchutil.subst as subst
-import time
-import arvados.commands.put as put
-import signal
-import stat
-import copy
-import traceback
-import pprint
-import multiprocessing
-import crunchutil.robust_put as robust_put
-import crunchutil.vwd as vwd
-import argparse
-import json
-import tempfile
-import errno
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--dry-run', action='store_true')
-parser.add_argument('--script-parameters', type=str, default="{}")
-args = parser.parse_args()
-
-os.umask(0077)
-
-if not args.dry_run:
-    api = arvados.api('v1')
-    t = arvados.current_task().tmpdir
-    os.chdir(arvados.current_task().tmpdir)
-    os.mkdir("tmpdir")
-    os.mkdir("output")
-
-    os.chdir("output")
-
-    outdir = os.getcwd()
-
-    taskp = None
-    jobp = arvados.current_job()['script_parameters']
-    if len(arvados.current_task()['parameters']) > 0:
-        taskp = arvados.current_task()['parameters']
-else:
-    outdir = "/tmp"
-    jobp = json.loads(args.script_parameters)
-    os.environ['JOB_UUID'] = 'zzzzz-8i9sb-1234567890abcde'
-    os.environ['TASK_UUID'] = 'zzzzz-ot0gb-1234567890abcde'
-    os.environ['CRUNCH_SRC'] = '/tmp/crunch-src'
-    if 'TASK_KEEPMOUNT' not in os.environ:
-        os.environ['TASK_KEEPMOUNT'] = '/keep'
-
-def sub_tmpdir(v):
-    return os.path.join(arvados.current_task().tmpdir, 'tmpdir')
-
-def sub_outdir(v):
-    return outdir
-
-def sub_cores(v):
-     return str(multiprocessing.cpu_count())
-
-def sub_jobid(v):
-     return os.environ['JOB_UUID']
-
-def sub_taskid(v):
-     return os.environ['TASK_UUID']
-
-def sub_jobsrc(v):
-     return os.environ['CRUNCH_SRC']
-
-subst.default_subs["task.tmpdir"] = sub_tmpdir
-subst.default_subs["task.outdir"] = sub_outdir
-subst.default_subs["job.srcdir"] = sub_jobsrc
-subst.default_subs["node.cores"] = sub_cores
-subst.default_subs["job.uuid"] = sub_jobid
-subst.default_subs["task.uuid"] = sub_taskid
-
-class SigHandler(object):
-    def __init__(self):
-        self.sig = None
-
-    def send_signal(self, subprocesses, signum):
-        for sp in subprocesses:
-            sp.send_signal(signum)
-        self.sig = signum
-
-# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
-def flatten(l, ltypes=(list, tuple)):
-    ltype = type(l)
-    l = list(l)
-    i = 0
-    while i < len(l):
-        while isinstance(l[i], ltypes):
-            if not l[i]:
-                l.pop(i)
-                i -= 1
-                break
-            else:
-                l[i:i + 1] = l[i]
-        i += 1
-    return ltype(l)
-
-def add_to_group(gr, match):
-    m = match.groups()
-    if m not in gr:
-        gr[m] = []
-    gr[m].append(match.group(0))
-
-class EvaluationError(Exception):
-    pass
-
-# Return the name of variable ('var') that will take on each value in 'items'
-# when performing an inner substitution
-def var_items(p, c, key):
-    if key not in c:
-        raise EvaluationError("'%s' was expected in 'p' but is missing" % key)
-
-    if "var" in c:
-        if not isinstance(c["var"], basestring):
-            raise EvaluationError("Value of 'var' must be a string")
-        # Var specifies the variable name for inner parameter substitution
-        return (c["var"], get_items(p, c[key]))
-    else:
-        # The component function ('key') value is a list, so return the list
-        # directly with no parameter selected.
-        if isinstance(c[key], list):
-            return (None, get_items(p, c[key]))
-        elif isinstance(c[key], basestring):
-            # check if c[key] is a string that looks like a parameter
-            m = re.match("^\$\((.*)\)$", c[key])
-            if m and m.group(1) in p:
-                return (m.group(1), get_items(p, c[key]))
-            else:
-                # backwards compatible, foreach specifies bare parameter name to use
-                return (c[key], get_items(p, p[c[key]]))
-        else:
-            raise EvaluationError("Value of '%s' must be a string or list" % key)
-
-# "p" is the parameter scope, "c" is the item to be expanded.
-# If "c" is a dict, apply function expansion.
-# If "c" is a list, recursively expand each item and return a new list.
-# If "c" is a string, apply parameter substitution
-def expand_item(p, c):
-    if isinstance(c, dict):
-        if "foreach" in c and "command" in c:
-            # Expand a command template for each item in the specified user
-            # parameter
-            var, items = var_items(p, c, "foreach")
-            if var is None:
-                raise EvaluationError("Must specify 'var' in foreach")
-            r = []
-            for i in items:
-                params = copy.copy(p)
-                params[var] = i
-                r.append(expand_item(params, c["command"]))
-            return r
-        elif "list" in c and "index" in c and "command" in c:
-            # extract a single item from a list
-            var, items = var_items(p, c, "list")
-            if var is None:
-                raise EvaluationError("Must specify 'var' in list")
-            params = copy.copy(p)
-            params[var] = items[int(c["index"])]
-            return expand_item(params, c["command"])
-        elif "regex" in c:
-            pattern = re.compile(c["regex"])
-            if "filter" in c:
-                # filter list so that it only includes items that match a
-                # regular expression
-                _, items = var_items(p, c, "filter")
-                return [i for i in items if pattern.match(i)]
-            elif "group" in c:
-                # generate a list of lists, where items are grouped on common
-                # subexpression match
-                _, items = var_items(p, c, "group")
-                groups = {}
-                for i in items:
-                    match = pattern.match(i)
-                    if match:
-                        add_to_group(groups, match)
-                return [groups[k] for k in groups]
-            elif "extract" in c:
-                # generate a list of lists, where items are split by
-                # subexpression match
-                _, items = var_items(p, c, "extract")
-                r = []
-                for i in items:
-                    match = pattern.match(i)
-                    if match:
-                        r.append(list(match.groups()))
-                return r
-        elif "batch" in c and "size" in c:
-            # generate a list of lists, where items are split into a batch size
-            _, items = var_items(p, c, "batch")
-            sz = int(c["size"])
-            r = []
-            for j in xrange(0, len(items), sz):
-                r.append(items[j:j+sz])
-            return r
-        raise EvaluationError("Missing valid list context function")
-    elif isinstance(c, list):
-        return [expand_item(p, arg) for arg in c]
-    elif isinstance(c, basestring):
-        m = re.match("^\$\((.*)\)$", c)
-        if m and m.group(1) in p:
-            return expand_item(p, p[m.group(1)])
-        else:
-            return subst.do_substitution(p, c)
-    else:
-        raise EvaluationError("expand_item() unexpected parameter type %s" % type(c))
-
-# Evaluate in a list context
-# "p" is the parameter scope, "value" will be evaluated
-# if "value" is a list after expansion, return that
-# if "value" is a path to a directory, return a list consisting of each entry in the directory
-# if "value" is a path to a file, return a list consisting of each line of the file
-def get_items(p, value):
-    value = expand_item(p, value)
-    if isinstance(value, list):
-        return value
-    elif isinstance(value, basestring):
-        mode = os.stat(value).st_mode
-        prefix = value[len(os.environ['TASK_KEEPMOUNT'])+1:]
-        if mode is not None:
-            if stat.S_ISDIR(mode):
-                items = [os.path.join(value, l) for l in os.listdir(value)]
-            elif stat.S_ISREG(mode):
-                with open(value) as f:
-                    items = [line.rstrip("\r\n") for line in f]
-            return items
-    raise EvaluationError("get_items did not yield a list")
-
-stdoutname = None
-stdoutfile = None
-stdinname = None
-stdinfile = None
-
-# Construct the cross product of all values of each variable listed in fvars
-def recursive_foreach(params, fvars):
-    var = fvars[0]
-    fvars = fvars[1:]
-    items = get_items(params, params[var])
-    logger.info("parallelizing on %s with items %s" % (var, items))
-    if items is not None:
-        for i in items:
-            params = copy.copy(params)
-            params[var] = i
-            if len(fvars) > 0:
-                recursive_foreach(params, fvars)
-            else:
-                if not args.dry_run:
-                    arvados.api().job_tasks().create(body={
-                        'job_uuid': arvados.current_job()['uuid'],
-                        'created_by_job_task_uuid': arvados.current_task()['uuid'],
-                        'sequence': 1,
-                        'parameters': params
-                    }).execute()
-                else:
-                    if isinstance(params["command"][0], list):
-                        for c in params["command"]:
-                            logger.info(flatten(expand_item(params, c)))
-                    else:
-                        logger.info(flatten(expand_item(params, params["command"])))
-    else:
-        logger.error("parameter %s with value %s in task.foreach yielded no items" % (var, params[var]))
-        sys.exit(1)
-
-try:
-    if "task.foreach" in jobp:
-        if args.dry_run or arvados.current_task()['sequence'] == 0:
-            # This is the first task to start the other tasks and exit
-            fvars = jobp["task.foreach"]
-            if isinstance(fvars, basestring):
-                fvars = [fvars]
-            if not isinstance(fvars, list) or len(fvars) == 0:
-                logger.error("value of task.foreach must be a string or non-empty list")
-                sys.exit(1)
-            recursive_foreach(jobp, jobp["task.foreach"])
-            if not args.dry_run:
-                if "task.vwd" in jobp:
-                    # Set output of the first task to the base vwd collection so it
-                    # will be merged with output fragments from the other tasks by
-                    # crunch.
-                    arvados.current_task().set_output(subst.do_substitution(jobp, jobp["task.vwd"]))
-                else:
-                    arvados.current_task().set_output(None)
-            sys.exit(0)
-    else:
-        # This is the only task so taskp/jobp are the same
-        taskp = jobp
-except Exception as e:
-    logger.exception("caught exception")
-    logger.error("job parameters were:")
-    logger.error(pprint.pformat(jobp))
-    sys.exit(1)
-
-try:
-    if not args.dry_run:
-        if "task.vwd" in taskp:
-            # Populate output directory with symlinks to files in collection
-            vwd.checkout(subst.do_substitution(taskp, taskp["task.vwd"]), outdir)
-
-        if "task.cwd" in taskp:
-            os.chdir(subst.do_substitution(taskp, taskp["task.cwd"]))
-
-    cmd = []
-    if isinstance(taskp["command"][0], list):
-        for c in taskp["command"]:
-            cmd.append(flatten(expand_item(taskp, c)))
-    else:
-        cmd.append(flatten(expand_item(taskp, taskp["command"])))
-
-    if "task.stdin" in taskp:
-        stdinname = subst.do_substitution(taskp, taskp["task.stdin"])
-        if not args.dry_run:
-            stdinfile = open(stdinname, "rb")
-
-    if "task.stdout" in taskp:
-        stdoutname = subst.do_substitution(taskp, taskp["task.stdout"])
-        if not args.dry_run:
-            stdoutfile = open(stdoutname, "wb")
-
-    if "task.env" in taskp:
-        env = copy.copy(os.environ)
-        for k,v in taskp["task.env"].items():
-            env[k] = subst.do_substitution(taskp, v)
-    else:
-        env = None
-
-    logger.info("{}{}{}".format(' | '.join([' '.join(c) for c in cmd]), (" < " + stdinname) if stdinname is not None else "", (" > " + stdoutname) if stdoutname is not None else ""))
-
-    if args.dry_run:
-        sys.exit(0)
-except subst.SubstitutionError as e:
-    logger.error(str(e))
-    logger.error("task parameters were:")
-    logger.error(pprint.pformat(taskp))
-    sys.exit(1)
-except Exception as e:
-    logger.exception("caught exception")
-    logger.error("task parameters were:")
-    logger.error(pprint.pformat(taskp))
-    sys.exit(1)
-
-# rcode holds the return codes produced by each subprocess
-rcode = {}
-try:
-    subprocesses = []
-    close_streams = []
-    if stdinfile:
-        close_streams.append(stdinfile)
-    next_stdin = stdinfile
-
-    for i in xrange(len(cmd)):
-        if i == len(cmd)-1:
-            # this is the last command in the pipeline, so its stdout should go to stdoutfile
-            next_stdout = stdoutfile
-        else:
-            # this is an intermediate command in the pipeline, so its stdout should go to a pipe
-            next_stdout = subprocess.PIPE
-
-        sp = subprocess.Popen(cmd[i], shell=False, stdin=next_stdin, stdout=next_stdout, env=env)
-
-        # Need to close the FDs on our side so that subcommands will get SIGPIPE if the
-        # consuming process ends prematurely.
-        if sp.stdout:
-            close_streams.append(sp.stdout)
-
-        # Send this processes's stdout to to the next process's stdin
-        next_stdin = sp.stdout
-
-        subprocesses.append(sp)
-
-    # File descriptors have been handed off to the subprocesses, so close them here.
-    for s in close_streams:
-        s.close()
-
-    # Set up signal handling
-    sig = SigHandler()
-
-    # Forward terminate signals to the subprocesses.
-    signal.signal(signal.SIGINT, lambda signum, frame: sig.send_signal(subprocesses, signum))
-    signal.signal(signal.SIGTERM, lambda signum, frame: sig.send_signal(subprocesses, signum))
-    signal.signal(signal.SIGQUIT, lambda signum, frame: sig.send_signal(subprocesses, signum))
-
-    active = 1
-    pids = set([s.pid for s in subprocesses])
-    while len(pids) > 0:
-        try:
-            (pid, status) = os.wait()
-        except OSError as e:
-            if e.errno == errno.EINTR:
-                pass
-            else:
-                raise
-        else:
-            pids.discard(pid)
-            if not taskp.get("task.ignore_rcode"):
-                rcode[pid] = (status >> 8)
-            else:
-                rcode[pid] = 0
-
-    if sig.sig is not None:
-        logger.critical("terminating on signal %s" % sig.sig)
-        sys.exit(2)
-    else:
-        for i in xrange(len(cmd)):
-            r = rcode[subprocesses[i].pid]
-            logger.info("%s completed with exit code %i (%s)" % (cmd[i][0], r, "success" if r == 0 else "failed"))
-
-except Exception as e:
-    logger.exception("caught exception")
-
-# restore default signal handlers.
-signal.signal(signal.SIGINT, signal.SIG_DFL)
-signal.signal(signal.SIGTERM, signal.SIG_DFL)
-signal.signal(signal.SIGQUIT, signal.SIG_DFL)
-
-logger.info("the following output files will be saved to keep:")
-
-subprocess.call(["find", "-L", ".", "-type", "f", "-printf", "run-command: %12.12s %h/%f\\n"], stdout=sys.stderr, cwd=outdir)
-
-logger.info("start writing output to keep")
-
-if "task.vwd" in taskp and "task.foreach" in jobp:
-    for root, dirs, files in os.walk(outdir):
-        for f in files:
-            s = os.lstat(os.path.join(root, f))
-            if stat.S_ISLNK(s.st_mode):
-                os.unlink(os.path.join(root, f))
-
-(outcollection, checkin_error) = vwd.checkin(outdir)
-
-# Success if we ran any subprocess, and they all exited 0.
-success = rcode and all(status == 0 for status in rcode.itervalues()) and not checkin_error
-
-api.job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                     body={
-                                         'output': outcollection.manifest_text(),
-                                         'success': success,
-                                         'progress':1.0
-                                     }).execute()
-
-sys.exit(0 if success else 1)
diff --git a/crunch_scripts/split-fastq.py b/crunch_scripts/split-fastq.py
deleted file mode 100755 (executable)
index 61c384f..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import hashlib
-import string
-
-api = arvados.api('v1')
-
-piece = 0
-manifest_text = ""
-
-# Look for paired reads
-
-inp = arvados.CollectionReader(arvados.getjobparam('reads'))
-
-manifest_list = []
-
-def nextline(reader, start):
-    n = -1
-    while True:
-        r = reader.readfrom(start, 128)
-        if r == '':
-            break
-        n = string.find(r, "\n")
-        if n > -1:
-            break
-        else:
-            start += 128
-    return n
-
-prog = re.compile(r'(.*?)(_[12])?\.fastq(\.gz)?$')
-
-# Look for fastq files
-for s in inp.all_streams():
-    for f in s.all_files():
-        name_pieces = prog.match(f.name())
-        if name_pieces is not None:
-            if s.name() != ".":
-                # The downstream tool (run-command) only iterates over the top
-                # level of directories so if there are fastq files in
-                # directories in the input, the choice is either to forget
-                # there are directories (which might lead to name conflicts) or
-                # just fail.
-                print >>sys.stderr, "fastq must be at the root of the collection"
-                sys.exit(1)
-
-            p = None
-            if name_pieces.group(2) is not None:
-                if name_pieces.group(2) == "_1":
-                    p = [{}, {}]
-                    p[0]["reader"] = s.files()[name_pieces.group(0)]
-                    p[1]["reader"] = s.files()[name_pieces.group(1) + "_2.fastq" + (name_pieces.group(3) if name_pieces.group(3) else '')]
-            else:
-                p = [{}]
-                p[0]["reader"] = s.files()[name_pieces.group(0)]
-
-            if p is not None:
-                for i in xrange(0, len(p)):
-                    m = p[i]["reader"].as_manifest().split()
-                    m[0] = "./_" + str(piece)
-                    manifest_list.append(m)
-                piece += 1
-
-manifest_text = "\n".join(" ".join(m) for m in manifest_list) + "\n"
-
-arvados.current_task().set_output(manifest_text)
diff --git a/crunch_scripts/test/task_output_dir b/crunch_scripts/test/task_output_dir
deleted file mode 100755 (executable)
index 8b2c7ce..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import arvados.crunch
-import hashlib
-import os
-
-out = arvados.crunch.TaskOutputDir()
-
-string = open(__file__).read()
-with open(os.path.join(out.path, 'example.out'), 'w') as f:
-    f.write(string)
-with open(os.path.join(out.path, 'example.out.SHA1'), 'w') as f:
-    f.write(hashlib.sha1(string).hexdigest() + "\n")
-
-arvados.current_task().set_output(out.manifest_text())
index 6b572f7fcc4ddbffa4c7903bfa700aa3f45e6882..0547c8ee9366f6788fbe87b34c5b319638a0734f 100644 (file)
@@ -67,18 +67,6 @@ navbar:
       - user/copying/agpl-3.0.html
       - user/copying/LICENSE-2.0.html
       - user/copying/by-sa-3.0.html
-    - Obsolete documentation:
-      - user/topics/running-pipeline-command-line.html.textile.liquid
-      - user/topics/arv-run.html.textile.liquid
-      - user/tutorials/running-external-program.html.textile.liquid
-      - user/topics/crunch-tools-overview.html.textile.liquid
-      - user/tutorials/tutorial-firstscript.html.textile.liquid
-      - user/tutorials/tutorial-submit-job.html.textile.liquid
-      - user/topics/tutorial-parallel.html.textile.liquid
-      - user/topics/run-command.html.textile.liquid
-      - user/reference/job-pipeline-ref.html.textile.liquid
-      - user/examples/crunch-examples.html.textile.liquid
-      - user/topics/tutorial-trait-search.html.textile.liquid
   sdk:
     - Overview:
       - sdk/index.html.textile.liquid
@@ -89,7 +77,6 @@ navbar:
       - sdk/python/arvados-fuse.html.textile.liquid
       - sdk/python/events.html.textile.liquid
       - sdk/python/cookbook.html.textile.liquid
-      - sdk/python/crunch-utility-libraries.html.textile.liquid
     - CLI:
       - sdk/cli/install.html.textile.liquid
       - sdk/cli/index.html.textile.liquid
@@ -141,13 +128,13 @@ navbar:
       - api/methods/container_requests.html.textile.liquid
       - api/methods/containers.html.textile.liquid
       - api/methods/workflows.html.textile.liquid
-    - Jobs engine (deprecated):
+    - Jobs engine (legacy):
       - api/crunch-scripts.html.textile.liquid
       - api/methods/jobs.html.textile.liquid
       - api/methods/job_tasks.html.textile.liquid
       - api/methods/pipeline_instances.html.textile.liquid
       - api/methods/pipeline_templates.html.textile.liquid
-    - Metadata for bioinformatics:
+    - Metadata for bioinformatics (deprecated):
       - api/methods/humans.html.textile.liquid
       - api/methods/specimens.html.textile.liquid
       - api/methods/traits.html.textile.liquid
@@ -228,8 +215,5 @@ navbar:
       - install/crunch2-slurm/install-test.html.textile.liquid
       - install/install-nodemanager.html.textile.liquid
       - install/install-compute-ping.html.textile.liquid
-    - Containers API support on cloud (experimental):
+    - Containers API support on cloud (beta):
       - install/install-dispatch-cloud.html.textile.liquid
-    - Jobs API support (deprecated):
-      - install/install-crunch-dispatch.html.textile.liquid
-      - install/install-compute-node.html.textile.liquid
diff --git a/doc/_includes/_arv_run_redirection.liquid b/doc/_includes/_arv_run_redirection.liquid
deleted file mode 100644 (file)
index 663de0b..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-<notextile>
-<pre>
-$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span>
-$ <span class="userinput">ls *.fastq</span>
-$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC \< *.fastq \> output.txt</span>
-[...]
- 1 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq > output.txt
- 2 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq > output.txt
- 2 stderr run-command: completed with exit code 0 (success)
- 2 stderr run-command: the following output files will be saved to keep:
- 2 stderr run-command: 121 ./output.txt
- 2 stderr run-command: start writing output to keep
- 1 stderr run-command: completed with exit code 0 (success)
- 1 stderr run-command: the following output files will be saved to keep:
- 1 stderr run-command: 363 ./output.txt
- 1 stderr run-command: start writing output to keep
- 2 stderr upload wrote 121 total 121
- 1 stderr upload wrote 363 total 363
-[..]
-</pre>
-</notextile>
index 7ea1b10fae1492679fd942ade2a2d22c51ea512e..28f08db4dd3ffbaf41c2385c59041010b22f22cb 100644 (file)
@@ -39,7 +39,17 @@ table(table table-bordered table-condensed).
 |"v1.1.4":#v1_1_4|"v1.1.3":#v1_1_3|"v1.1.2":#v1_1_2|"v1.1.1":#v1_1_1|"v1.1.0":#v1_1_0|
 |\5. "older":#older|
 
-h3(#master). development master (as of 2019-06-07)
+h3(#master). development master (as of 2019-08-12)
+
+h4. Keep-web dropped support on command line flags configuration
+
+As we're migrating to a central cluster configuration file, the already deprecated way of getting configurations via environment variables and command line flags isn't valid anymore. Current keep-web supports both the now legacy @keep-web.yml@ config format (used by Arvados 1.4) and the new cluster config file format. Please check "keep-web's install guide":{{site.baseurl}}/install/install-keep-web.html for more details.
+
+h4. Jobs API is read-only
+
+(task "#15133":https://dev.arvados.org/issues/15133 ) The legacy 'jobs' API is now read-only.  It has long been superceded by containers / container_requests (aka crunch v2).  Arvados installations since the end of 2017 (v1.1.0) have probably only used containers, and are unaffected by this change.
+
+So that older Arvados sites don't lose access to legacy records, the API has been converted to read-only.  Creating and updating jobs (and related types job_task, pipeline_template and pipeline_instance) is disabled and much of the business logic related has been removed, along with various other code specific to the jobs API.  Specifically, the following programs associated with the jobs API have been removed: @crunch-dispatch.rb@, @crunch-job@, @crunchrunner@, @arv-run-pipeline-instance@, @arv-run@.
 
 h4. Keepproxy configuration migration
 
@@ -47,7 +57,7 @@ Keepproxy can now be configured using the centralized config at @/etc/arvados/co
 
 h4. No longer stripping ':' from strings in serialized database columns
 
- (bug #15311) Strings read from serialized columns in the database with a leading ':' would have the ':' stripped after loading the record.  This behavior existed due to legacy serialization behavior which stored Ruby symbols with a leading ':'.  Unfortunately this corrupted fields where the leading ":" was intentional.  This behavior has been removed.
+(bug "#15311":https://dev.arvados.org/issues/15311 ) Strings read from serialized columns in the database with a leading ':' would have the ':' stripped after loading the record.  This behavior existed due to legacy serialization behavior which stored Ruby symbols with a leading ':'.  Unfortunately this corrupted fields where the leading ":" was intentional.  This behavior has been removed.
 
 You can test if any records in your database are affected by going to the API server directory and running @bundle exec rake symbols:check@.  This will report which records contain fields with a leading ':' that would previously have been stripped.  If there are records to be updated, you can update the database using @bundle exec rake symbols:stringify@.
 
index 3df1db407677add56ff9303e7a5179c8ed0b82b0..86a22ed6cb2fc4464467172d0cca95f8e55c2708 100644 (file)
@@ -11,6 +11,8 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
+p=. *Legacy.  The job APIs are read-only and disabled by default in new installations.  Use "container requests":container_requests.html.textile.liquid .*
+
 h2. Crunch scripts
 
 A crunch script is responsible for completing a single JobTask. In doing so, it will:
index cada9ab1b88ac226231633a8a3b43f56cf735a5b..74942ba55fcebbc03d26d4a59296f6fb14dbebf8 100644 (file)
@@ -49,10 +49,3 @@ For example, for a 3.75 GiB node, default Keep cache, and no extra RAM reserved:
     (3840 * 0.95) - 256 = 3392 MiB
 
 To run on this instance type, the container can request at most 3392 MiB of working RAM.
-
-h2. Job API (deprecated)
-
-# To submit work, create a "job":{{site.baseurl}}/api/methods/jobs.html .  If the same job has been submitted in the past, it will return an existing job in @Completed@ state.
-# The dispatcher process will notice a new job in @Queued@ state and attempt to allocate nodes to run the job.
-# The job executes.
-# Retrieve the @output@ field with the portable data hash of the collection with the output files of the job.
index 84cb22c99738755dded5f6cea7b35cf02ecfdf2f..e08e941cf66f68a21c0d6b765f09ddbec6faab11 100644 (file)
@@ -11,6 +11,8 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
+p=. *Deprecated, likely to be removed in a future version.  The recommended way to store metadata is "collection properties":collections.html*
+
 API endpoint base: @https://{{ site.arvados_api_host }}/arvados/v1/humans@
 
 Object type: @7a9it@
index deee3a58069178c5cec23c3e7c9d188461140532..69c3f07e3006a29ae3f64f7d6540aaf95343561d 100644 (file)
@@ -11,6 +11,8 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
+p=. *Legacy.  This endpoint is read-only and disabled by default in new installations.*
+
 API endpoint base: @https://{{ site.arvados_api_host }}/arvados/v1/job_tasks@
 
 Object type: @ot0gb@
index 2f061866f95946b41c66b5911372a7aded620f32..8b06c989a6604cd65b2c13f2b86a69abfa316457 100644 (file)
@@ -11,6 +11,8 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
+p=. *Legacy.  This endpoint is read-only and disabled by default in new installations.*
+
 API endpoint base: @https://{{ site.arvados_api_host }}/arvados/v1/jobs@
 
 Object type: @8i9sb@
@@ -19,8 +21,6 @@ Example UUID: @zzzzz-8i9sb-0123456789abcde@
 
 h2. Resource
 
-Deprecated.
-
 A job describes a work order to be executed by the Arvados cluster.
 
 Each job has, in addition to the "Common resource fields":{{site.baseurl}}/api/resources.html:
index 09fd4fe48189d3ceea5ccd37fdabe631724aecb3..56c071ef9b8ef1c13c03f2aea112a879cd193d1f 100644 (file)
@@ -11,6 +11,8 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
+p=. *Legacy.  This endpoint is read-only and disabled by default in new installations.*
+
 API endpoint base: @https://{{ site.arvados_api_host }}/arvados/v1/pipeline_instances@
 
 Object type: @d1hrv@
index 85df2799e421e31ea9372a77f58c38f311913939..40297aa05199b77ac317b8afc94843961b03702d 100644 (file)
@@ -11,6 +11,8 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
+p=. *Legacy.  This endpoint is read-only and disabled by default in new installations.*
+
 API endpoint base: @https://{{ site.arvados_api_host }}/arvados/v1/pipeline_templates@
 
 Object type: @p5p6p@
index 6ee79ca3df2d543da2d82c7ccbf72c78d1bfa129..be3712a2064cdd174150769078ccf5eaf5c5d8a6 100644 (file)
@@ -10,6 +10,8 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
+p=. *Deprecated, likely to be removed in a future version.  The recommended way to store metadata is "collection properties":collections.html*
+
 API endpoint base: @https://{{ site.arvados_api_host }}/arvados/v1/specimens@
 
 Object type: @j58dm@
index 34b60cfe7e143b307372d6ac9c1c5994e317f64e..e48804702eac9fd330d2431cf8e189a01188357d 100644 (file)
@@ -11,6 +11,8 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
+p=. *Deprecated, likely to be removed in a future version.  The recommended way to store metadata is "collection properties":collections.html*
+
 API endpoint base: @https://{{ site.arvados_api_host }}/arvados/v1/traits@
 
 Object type: @q1cn2@
diff --git a/doc/install/install-compute-node.html.textile.liquid b/doc/install/install-compute-node.html.textile.liquid
deleted file mode 100644 (file)
index 02eb216..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
----
-layout: default
-navsection: installguide
-title: Install a compute node
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-h2. Install dependencies
-
-First, "add the appropriate package repository for your distribution":{{ site.baseurl }}/install/install-manual-prerequisites.html#repos.
-
-{% include 'note_python_sc' %}
-
-On Red Hat-based systems:
-
-<notextile>
-<pre><code>~$ <span class="userinput">echo 'exclude=python2-llfuse' | sudo tee -a /etc/yum.conf</span>
-~$ <span class="userinput">sudo yum install perl python-virtualenv fuse python-arvados-python-client python-arvados-fuse crunchrunner crunchstat arvados-docker-cleaner iptables ca-certificates</span>
-</code></pre>
-</notextile>
-
-On Debian-based systems:
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install perl python-virtualenv fuse python-arvados-python-client python-arvados-fuse crunchrunner crunchstat arvados-docker-cleaner iptables ca-certificates</span>
-</code></pre>
-</notextile>
-
-{% include 'install_compute_docker' %}
-
-h2. Set up SLURM
-
-Install SLURM following "the same process you used to install the Crunch dispatcher":install-crunch-dispatch.html#slurm.
-
-h2. Copy configuration files from the dispatcher (API server)
-
-The @slurm.conf@ and @/etc/munge/munge.key@ files need to be identical across the dispatcher and all compute nodes. Copy the files you created in the "Install the Crunch dispatcher":install-crunch-dispatch.html step to this compute node.
-
-{% include 'install_compute_fuse' %}
-
-{% include 'install_docker_cleaner' %}
-
-h2. Add a Crunch user account
-
-Create a Crunch user account, and add it to the @fuse@ and @docker@ groups so it can use those tools:
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo useradd --groups fuse,docker crunch</span>
-</code></pre>
-</notextile>
-
-The crunch user should have the same UID, GID, and home directory across all compute nodes and the dispatcher (API server).
-
-h2. Tell the API server about this compute node
-
-Load your API superuser token on the compute node:
-
-<notextile>
-<pre><code>
-~$ <span class="userinput">HISTIGNORE=$HISTIGNORE:'export ARVADOS_API_TOKEN=*'</span>
-~$ <span class="userinput">export ARVADOS_API_TOKEN=@your-superuser-token@</span>
-~$ <span class="userinput">export ARVADOS_API_HOST=@uuid_prefix.your.domain@</span>
-~$ <span class="userinput">unset ARVADOS_API_HOST_INSECURE</span>
-</code>
-</pre>
-</notextile>
-
-Then execute this script to create a compute node object, and set up a cron job to have the compute node ping the API server every five minutes:
-
-<notextile>
-<pre><code>
-#!/bin/bash
-set -e
-if ! test -f /root/node.json ; then
-    python - &lt;&lt;EOF
-import arvados, json, socket
-fqdn = socket.getfqdn()
-hostname, _, domain = fqdn.partition('.')
-node = arvados.api('v1').nodes().create(body={'hostname': hostname, 'domain': domain}).execute()
-with open('/root/node.json', 'w') as node_file:
-    json.dump(node, node_file, indent=2)
-EOF
-
-    # Make sure /dev/fuse permissions are correct (the device appears after fuse is loaded)
-    chmod 1660 /dev/fuse && chgrp fuse /dev/fuse
-fi
-
-UUID=`grep \"uuid\" /root/node.json  |cut -f4 -d\"`
-PING_SECRET=`grep \"ping_secret\" /root/node.json  |cut -f4 -d\"`
-
-if ! test -f /etc/cron.d/node_ping ; then
-    echo "*/5 * * * * root /usr/bin/curl -k -d ping_secret=$PING_SECRET https://$ARVADOS_API_HOST/arvados/v1/nodes/$UUID/ping" > /etc/cron.d/node_ping
-fi
-
-/usr/bin/curl -k -d ping_secret=$PING_SECRET https://$ARVADOS_API_HOST/arvados/v1/nodes/$UUID/ping?ping_secret=$PING_SECRET
-</code>
-</pre>
-</notextile>
-
-And remove your token from the environment:
-
-<notextile>
-<pre><code>
-~$ <span class="userinput">unset ARVADOS_API_TOKEN</span>
-~$ <span class="userinput">unset ARVADOS_API_HOST</span>
-</code>
-</pre>
-</notextile>
diff --git a/doc/install/install-crunch-dispatch.html.textile.liquid b/doc/install/install-crunch-dispatch.html.textile.liquid
deleted file mode 100644 (file)
index e0ed147..0000000
+++ /dev/null
@@ -1,207 +0,0 @@
----
-layout: default
-navsection: installguide
-title: Install the Crunch dispatcher
-
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-The dispatcher normally runs on the same host/VM as the API server.
-
-h2. Test the Arvados job queue
-
-Crunch dispatches work from the job queue on the Arvados API server.  Before you start installing the Crunch dispatcher, now's a good time to check that the API server and Git server can coordinate to create job records.  Run these commands *on your shell server* to create a collection, and a job to calculate the MD5 checksum of every file in it:
-
-<notextile>
-<pre><code>~$ <span class="userinput">echo 'Hello, Crunch!' | arv-put --portable-data-hash -</span>
-&hellip;
-d40c7f35d80da669afb9db1896e760ad+49
-~$ <span class="userinput">read -rd $'\000' newjob &lt;&lt;EOF; arv job create --job "$newjob"
-{"script_parameters":{"input":"d40c7f35d80da669afb9db1896e760ad+49"},
- "script_version":"0988acb472849dc0",
- "script":"hash",
- "repository":"arvados"}
-EOF</span>
-</code></pre>
-</notextile>
-
-If you get the error
-
-<pre>
-ArgumentError: Specified script_version does not resolve to a commit
-</pre>
-
-it often means that the API server can't read the specified repository&mdash;either because it doesn't exist, or because the user running the API server doesn't have permission to read the repository files.  Check the API server's log (@/var/www/arvados-api/current/log/production.log@) for details, and double-check the instructions in the "Git server installation guide":install-arv-git-httpd.html.
-
-If everything goes well, the API server should create a job record, and your @arv@ command will output the JSON for that record.  It should have state @Queued@ and script_version @0988acb472849dc08d576ee40493e70bde2132ca@.  If the job JSON includes those fields, you can proceed to install the Crunch dispatcher and a compute node.  This job will remain queued until you install those services.
-
-h2. Perl SDK dependencies
-
-Install the Perl SDK on the controller.
-
-* See "Perl SDK":{{site.baseurl}}/sdk/perl/index.html page for details.
-
-h2. Python SDK dependencies
-
-Install the Python SDK and CLI tools on controller and all compute nodes.
-
-* See "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html page for details.
-
-h2(#slurm). Set up SLURM
-
-On the API server, install SLURM and munge, and generate a munge key.
-
-On Debian-based systems:
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo /usr/bin/apt-get install slurm-llnl munge</span>
-~$ <span class="userinput">sudo /usr/sbin/create-munge-key</span>
-</code></pre>
-</notextile>
-
-On Red Hat-based systems:
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo yum install slurm munge slurm-munge</span>
-</code></pre>
-</notextile>
-
-Now we need to give SLURM a configuration file.  On Debian-based systems, this is installed at @/etc/slurm-llnl/slurm.conf@.  On Red Hat-based systems, this is installed at @/etc/slurm/slurm.conf@.  Here's an example @slurm.conf@:
-
-<notextile>
-<pre>
-ControlMachine=uuid_prefix.your.domain
-SlurmctldPort=6817
-SlurmdPort=6818
-AuthType=auth/munge
-StateSaveLocation=/tmp
-SlurmdSpoolDir=/tmp/slurmd
-SwitchType=switch/none
-MpiDefault=none
-SlurmctldPidFile=/var/run/slurmctld.pid
-SlurmdPidFile=/var/run/slurmd.pid
-ProctrackType=proctrack/pgid
-CacheGroups=0
-ReturnToService=2
-TaskPlugin=task/affinity
-#
-# TIMERS
-SlurmctldTimeout=300
-SlurmdTimeout=300
-InactiveLimit=0
-MinJobAge=300
-KillWait=30
-Waittime=0
-#
-# SCHEDULING
-SchedulerType=sched/backfill
-SchedulerPort=7321
-SelectType=select/linear
-FastSchedule=0
-#
-# LOGGING
-SlurmctldDebug=3
-#SlurmctldLogFile=
-SlurmdDebug=3
-#SlurmdLogFile=
-JobCompType=jobcomp/none
-#JobCompLoc=
-JobAcctGatherType=jobacct_gather/none
-#
-# COMPUTE NODES
-NodeName=DEFAULT
-PartitionName=DEFAULT MaxTime=INFINITE State=UP
-
-NodeName=compute[0-255]
-PartitionName=compute Nodes=compute[0-255] Default=YES Shared=YES
-</pre>
-</notextile>
-
-h3. SLURM configuration essentials
-
-Whenever you change this file, you will need to update the copy _on every compute node_ as well as the controller node, and then run @sudo scontrol reconfigure@.
-
-*@ControlMachine@* should be a DNS name that resolves to the SLURM controller (dispatch/API server). This must resolve correctly on all SLURM worker nodes as well as the controller itself. In general SLURM is very sensitive about all of the nodes being able to communicate with the controller _and one another_, all using the same DNS names.
-
-*@NodeName=compute[0-255]@* establishes that the hostnames of the worker nodes will be compute0, compute1, etc. through compute255.
-* There are several ways to compress sequences of names, like @compute[0-9,80,100-110]@. See the "hostlist" discussion in the @slurm.conf(5)@ and @scontrol(1)@ man pages for more information.
-* It is not necessary for all of the nodes listed here to be alive in order for SLURM to work, although you should make sure the DNS entries exist. It is easiest to define lots of hostnames up front, assigning them to real nodes and updating your DNS records as the nodes appear. This minimizes the frequency of @slurm.conf@ updates and use of @scontrol reconfigure@.
-
-Each hostname in @slurm.conf@ must also resolve correctly on all SLURM worker nodes as well as the controller itself. Furthermore, the hostnames used in the configuration file must match the hostnames reported by @hostname@ or @hostname -s@ on the nodes themselves. This applies to the ControlMachine as well as the worker nodes.
-
-For example:
-* In @slurm.conf@ on control and worker nodes: @ControlMachine=uuid_prefix.your.domain@
-* In @slurm.conf@ on control and worker nodes: @NodeName=compute[0-255]@
-* In @/etc/resolv.conf@ on control and worker nodes: @search uuid_prefix.your.domain@
-* On the control node: @hostname@ reports @uuid_prefix.your.domain@
-* On worker node 123: @hostname@ reports @compute123.uuid_prefix.your.domain@
-
-h3. Automatic hostname assignment
-
-If your worker node bootstrapping script (see "Installing a compute node":install-compute-node.html) does not send the worker's current hostname, the API server will choose an unused hostname from the set given in @application.yml@, which defaults to @compute[0-255]@.
-
-If it is not feasible to give your compute nodes hostnames like compute0, compute1, etc., you can accommodate other naming schemes with a bit of extra configuration.
-
-If you want Arvados to assign names to your nodes with a different consecutive numeric series like @{worker1-0000, worker1-0001, worker1-0002}@, add an entry to @application.yml@; see @/var/www/arvados-api/current/config/application.default.yml@ for details. Example:
-* In @application.yml@: <code>assign_node_hostname: worker1-%<slot_number>04d</code>
-* In @slurm.conf@: <code>NodeName=worker1-[0000-0255]</code>
-
-If your worker hostnames are already assigned by other means, and the full set of names is known in advance, have your worker node bootstrapping script (see "Installing a compute node":install-compute-node.html) send its current hostname, rather than expect Arvados to assign one.
-* In @application.yml@: <code>assign_node_hostname: false</code>
-* In @slurm.conf@: <code>NodeName=alice,bob,clay,darlene</code>
-
-If your worker hostnames are already assigned by other means, but the full set of names is _not_ known in advance, you can use the @slurm.conf@ and @application.yml@ settings in the previous example, but you must also update @slurm.conf@ (both on the controller and on all worker nodes) and run @sudo scontrol reconfigure@ whenever a new node comes online.
-
-h2. Enable SLURM job dispatch
-
-In your API server's @application.yml@ configuration file, add the line @crunch_job_wrapper: :slurm_immediate@ under the appropriate section.  (The second colon is not a typo.  It denotes a Ruby symbol.)
-
-h2. Crunch user account
-
-Run @sudo adduser crunch@.  The crunch user should have the same UID, GID, and home directory on all compute nodes and on the dispatcher (API server).
-
-h2. Run the Crunch dispatcher service
-
-To dispatch Arvados jobs:
-
-* The API server script @crunch-dispatch.rb@ must be running.
-* @crunch-job@ needs the installation path of the Perl SDK in its @PERLLIB@.
-* @crunch-job@ needs the @ARVADOS_API_HOST@ (and, if necessary, @ARVADOS_API_HOST_INSECURE@) environment variable set.
-
-Install runit to monitor the Crunch dispatch daemon.  {% include 'install_runit' %}
-
-Install the script below as the run script for the Crunch dispatch service, modifying it as directed by the comments.
-
-<notextile>
-<pre><code>#!/bin/sh
-set -e
-
-rvmexec=""
-## Uncomment this line if you use RVM:
-#rvmexec="/usr/local/rvm/bin/rvm-exec default"
-
-export ARVADOS_API_HOST=<span class="userinput">uuid_prefix.your.domain</span>
-export CRUNCH_DISPATCH_LOCKFILE=/var/lock/crunch-dispatch
-export HOME=$(pwd)
-export RAILS_ENV=production
-
-## Uncomment and edit this line if your compute nodes have cgroup info
-## somewhere other than /sys/fs/cgroup (e.g., "/cgroup" for CentOS 7)
-#export CRUNCH_CGROUP_ROOT="/sys/fs/cgroup"
-
-## Uncomment this line if your cluster uses self-signed SSL certificates:
-#export ARVADOS_API_HOST_INSECURE=yes
-
-# This is the path to docker on your compute nodes. You might need to
-# change it to "docker", "/opt/bin/docker", etc.
-export CRUNCH_JOB_DOCKER_BIN=<span class="userinput">docker.io</span>
-
-fuser -TERM -k $CRUNCH_DISPATCH_LOCKFILE || true
-cd /var/www/arvados-api/current
-exec $rvmexec bundle exec ./script/crunch-dispatch.rb 2>&1
-</code></pre>
-</notextile>
index 2991d7b0dc2df95aef92aaf191856067d3362adf..902ced0372a972449db08452b59c888a47f1d237 100644 (file)
@@ -45,40 +45,16 @@ Verify that @Keep-web@ is functional:
 <notextile>
 <pre><code>~$ <span class="userinput">keep-web -h</span>
 Usage of keep-web:
-  -allow-anonymous
-        Serve public data to anonymous clients. Try the token supplied in the ARVADOS_API_TOKEN environment variable when none of the tokens provided in an HTTP request succeed in reading the desired collection. (default false)
-  -attachment-only-host string
-        Accept credentials, and add "Content-Disposition: attachment" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or TLS.
-  -listen string
-        Address to listen on: "host:port", or ":port" to listen on all interfaces. (default ":80")
-  -trust-all-content
-        Serve non-public content from a single origin. Dangerous: read docs before using!
+  -config file
+       Site configuration file (default may be overridden by setting an ARVADOS_CONFIG environment variable) (default "/etc/arvados/config.yml")
+  -dump-config
+       write current configuration to stdout and exit
+[...]
+  -version
+       print version information and exit.
 </code></pre>
 </notextile>
 
-{% assign railscmd = "bundle exec ./script/get_anonymous_user_token.rb --get" %}
-{% assign railsout = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" %}
-If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another. {% include 'install_rails_command' %}
-
-Install runit to supervise the Keep-web daemon.  {% include 'install_runit' %}
-
-The basic command to start Keep-web in the service run script is:
-
-<notextile>
-<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
-export ARVADOS_API_TOKEN="<span class="userinput">{{railsout}}</span>"
-exec sudo -u nobody keep-web \
- -listen=<span class="userinput">:9002</span> \
- -attachment-only-host=<span class="userinput">download.uuid_prefix.your.domain</span> \
- -allow-anonymous \
- 2&gt;&amp;1
-</code></pre>
-</notextile>
-
-Omit the @-allow-anonymous@ argument if you do not want to serve public data.
-
-Set @ARVADOS_API_HOST_INSECURE=1@ if your API server's TLS certificate is not signed by a recognized CA.
-
 h3. Set up a reverse proxy with TLS support
 
 The Keep-web service will be accessible from anywhere on the internet, so we recommend using TLS for transport encryption.
@@ -134,24 +110,99 @@ Configure your DNS servers so the following names resolve to your Nginx proxy's
 
 If neither of the above wildcard options is feasible, you have two choices:
 # Serve web content at @collections.uuid_prefix.your.domain@, but only for unauthenticated requests (public data and collection sharing links). Authenticated requests will always result in file downloads, using the @download@ name. For example, the Workbench "preview" button and the "view entire log file" link will invoke file downloads instead of displaying content in the browser window.
-# In the special case where you know you are immune to XSS exploits, you can enable the "trust all content" mode in Keep-web (with the @-trust-all-content@ command line flag) and Workbench (with the @trust_all_content@ item in @application.yml@). With both of these enabled, inline web content can be served from a single @collections@ host name; no wildcard DNS or certificate is needed. Do not do this without understanding the security implications described in the "Keep-web documentation":http://godoc.org/github.com/curoverse/arvados/services/keep-web.
+# In the special case where you know you are immune to XSS exploits, you can enable the "trust all content" mode in Keep-web and Workbench (setting @Collections.TrustAllContent: true@ on the config file). With this enabled, inline web content can be served from a single @collections@ host name; no wildcard DNS or certificate is needed. Do not do this without understanding the security implications described in the "Keep-web documentation":http://godoc.org/github.com/curoverse/arvados/services/keep-web.
 
-h3. Tell Workbench about the Keep-web service
+h2. Configure Keep-web
+
+{% assign railscmd = "bundle exec ./script/get_anonymous_user_token.rb --get" %}
+{% assign railsout = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" %}
+If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another. {% include 'install_rails_command' %}
+
+Set the cluster config file like the following:
+
+<notextile>
+<pre><code>Clusters:
+  <span class="userinput">uuid_prefix</span>:
+    Services:
+      Controller:
+        ExternalURL: "https://<span class="userinput">uuid_prefix</span>.your.domain"
+      WebDAV:
+        InternalURLs:
+          "http://keep_web_hostname_goes_here:9002/": {}
+        ExternalURL: "https://collections.<span class="userinput">uuid_prefix</span>.your.domain"
+      WebDAVDownload:
+        InternalURLs:
+          "http://keep_web_hostname_goes_here:9002/": {}
+        ExternalURL: "https://download.<span class="userinput">uuid_prefix</span>.your.domain"
+    Users:
+      AnonymousUserToken: "{{railsout}}"
+    Collections:
+      TrustAllContent: false
+    TLS:
+      Insecure: false
+</code></pre>
+</notextile>
+
+Set @Users.AnonymousUserToken: ""@ (empty string) if you do not want to serve public data.
+
+Set @TLS.Insecure: true@ if your API server's TLS certificate is not signed by a recognized CA.
 
 Workbench has features like "download file from collection" and "show image" which work better if the content is served by Keep-web rather than Workbench itself. We recommend using the two different hostnames ("download" and "collections" above) for file downloads and inline content respectively.
 
-Add the following entry to your Workbench configuration file (@/etc/arvados/workbench/application.yml@). This URL will be used for file downloads.
+The following entry on your cluster configuration file (@/etc/arvados/config.yml@) details the URL that will be used for file downloads.
 
 <notextile>
-<pre><code>keep_web_download_url: https://download.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
+<pre><code>Clusters:
+  <span class="userinput">uuid_prefix</span>:
+    Services:
+      WebDAVDownload:
+        ExternalURL: "https://download.<span class="userinput">uuid_prefix</span>.your.domain"
 </code></pre>
 </notextile>
 
-Additionally, add *one* of the following entries to your Workbench configuration file, depending on your DNS setup. This URL will be used to serve user content that can be displayed in the browser, like image previews and static HTML pages.
+Additionally, one of the following entries on your cluster configuration file (depending on your DNS setup) tells Workbench which URL will be used to serve user content that can be displayed in the browser, like image previews and static HTML pages.
+
+<notextile>
+<pre><code>Clusters:
+  <span class="userinput">uuid_prefix</span>:
+    Services:
+      WebDAV:
+        ExternalURL: "https://*--collections.<span class="userinput">uuid_prefix</span>.your.domain"
+        ExternalURL: "https://*.collections.<span class="userinput">uuid_prefix</span>.your.domain"
+        ExternalURL: "https://collections.<span class="userinput">uuid_prefix</span>.your.domain"
+</code></pre>
+</notextile>
+
+h2. Run Keep-web
+
+h3. Start the service (option 1: systemd)
+
+If your system does not use systemd, skip this section and follow the "runit instructions":#runit instead.
+
+If your system uses systemd, the keep-web service should already be set up. Start it and check its status:
 
 <notextile>
-<pre><code>keep_web_url: https://%{uuid_or_pdh}--collections.<span class="userinput">uuid_prefix</span>.your.domain
-keep_web_url: https://%{uuid_or_pdh}.collections.<span class="userinput">uuid_prefix</span>.your.domain
-keep_web_url: https://collections.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
+<pre><code>~$ <span class="userinput">sudo systemctl restart keep-web</span>
+~$ <span class="userinput">sudo systemctl status keep-web</span>
+&#x25cf; keep-web.service - Arvados Keep web gateway
+   Loaded: loaded (/lib/systemd/system/keep-web.service; enabled)
+   Active: active (running) since Sat 2019-08-10 10:33:21 UTC; 3 days ago
+     Docs: https://doc.arvados.org/
+ Main PID: 4242 (keep-web)
+   CGroup: /system.slice/keep-web.service
+           â””─4242 /usr/bin/keep-web
+[...]
 </code></pre>
 </notextile>
+
+h3(#runit). Start the service (option 2: runit)
+
+Install runit to supervise the Keep-web daemon.  {% include 'install_runit' %}
+
+The basic command to start Keep-web in the service run script is:
+
+<notextile>
+<pre><code>exec keep-web
+</code></pre>
+</notextile>
+
index b23ec46fa75f35f1955aea204d3f01bd6407f365..50d5d89871a612b368c0c46e966ed5717faa7a6a 100644 (file)
@@ -339,98 +339,3 @@ optional arguments:
                         Default 3.
 </pre>
 </notextile>
-
-
-h3(#arv-pipeline-run). arv pipeline run
-
-WARNING: this uses the obsolete "job" API.  Don't use this.  You should use @arvados-cwl-runner@ instead.
-
-@arv pipeline run@ can be used to start a pipeline run from the command line.
-
-The User Guide has a page with a bit more information on "using arv pipeline run":{{site.baseurl}}/user/topics/running-pipeline-command-line.html.
-
-<notextile>
-<pre>
-$ <code class="userinput">arv pipeline run --help</code>
-Options:
-        --dry-run, -n:   Do not start any new jobs or wait for existing jobs to
-                         finish. Just find out whether jobs are finished,
-                         queued, or running for each component.
-    --status-text &lt;s&gt;:   Store plain text status in given file. (Default:
-                         /dev/stdout)
-    --status-json &lt;s&gt;:   Store json-formatted pipeline in given file. (Default:
-                         /dev/null)
-            --no-wait:   Do not wait for jobs to finish. Just look up status,
-                         submit new jobs if needed, and exit.
-           --no-reuse:   Do not reuse existing jobs to satisfy pipeline
-                         components. Submit a new job for every component.
-          --debug, -d:   Print extra debugging information on stderr.
-    --debug-level &lt;i&gt;:   Set debug verbosity level.
-       --template &lt;s&gt;:   UUID of pipeline template, or path to local pipeline
-                         template file.
-       --instance &lt;s&gt;:   UUID of pipeline instance.
-             --submit:   Submit the pipeline instance to the server, and exit.
-                         Let the Crunch dispatch service satisfy the components
-                         by finding/running jobs.
-  --run-pipeline-here:   Manage the pipeline instance in-process. Submit jobs
-                         to Crunch as needed. Do not exit until the pipeline
-                         finishes (or fails).
-      --run-jobs-here:   Run jobs in the local terminal session instead of
-                         submitting them to Crunch. Implies
-                         --run-pipeline-here. Note: this results in a
-                         significantly different job execution environment, and
-                         some Crunch features are not supported. It can be
-                         necessary to modify a pipeline in order to make it run
-                         this way.
-           --run-here:   Synonym for --run-jobs-here.
-    --description &lt;s&gt;:   Description for the pipeline instance.
-        --version, -v:   Print version and exit
-           --help, -h:   Show this message
-</pre>
-</notextile>
-
-h3(#arv-run). arv run
-
-WARNING: this uses the obsolete "job" API.  Don't use this.  You should use @arvados-cwl-runner@ instead.
-
-The @arv-run@ command creates Arvados pipelines at the command line that fan out to multiple concurrent tasks across Arvados compute nodes.
-
-The User Guide has a page on "using arv-run":{{site.baseurl}}/user/topics/arv-run.html.
-
-<notextile>
-<pre>
-$ <code class="userinput">arv run --help</code>
-usage: arv-run [-h] [--retries RETRIES] [--dry-run] [--local]
-               [--docker-image DOCKER_IMAGE] [--ignore-rcode] [--no-reuse]
-               [--no-wait] [--project-uuid PROJECT_UUID] [--git-dir GIT_DIR]
-               [--repository REPOSITORY] [--script-version SCRIPT_VERSION]
-               ...
-
-positional arguments:
-  args
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --retries RETRIES     Maximum number of times to retry server requests that
-                        encounter temporary failures (e.g., server down).
-                        Default 3.
-  --dry-run             Print out the pipeline that would be submitted and
-                        exit
-  --local               Run locally using arv-run-pipeline-instance
-  --docker-image DOCKER_IMAGE
-                        Docker image to use, otherwise use instance default.
-  --ignore-rcode        Commands that return non-zero return codes should not
-                        be considered failed.
-  --no-reuse            Do not reuse past jobs.
-  --no-wait             Do not wait and display logs after submitting command,
-                        just exit.
-  --project-uuid PROJECT_UUID
-                        Parent project of the pipeline
-  --git-dir GIT_DIR     Git repository passed to arv-crunch-job when using
-                        --local
-  --repository REPOSITORY
-                        repository field of component, default 'arvados'
-  --script-version SCRIPT_VERSION
-                        script_version field of component, default 'master'
-</pre>
-</notextile>
diff --git a/doc/sdk/python/crunch-utility-libraries.html.textile.liquid b/doc/sdk/python/crunch-utility-libraries.html.textile.liquid
deleted file mode 100644 (file)
index 3e26315..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
----
-layout: default
-navsection: sdk
-navmenu: Python
-title: "Crunch utility libraries"
-
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'pipeline_deprecation_notice' %}
-
-Several utility libraries are included with Arvados. They are intended to make it quicker and easier to write your own crunch scripts.
-
-* "Python SDK extras":#pythonsdk
-* "Toolkit wrappers":#toolkit_wrappers
-
-h2(#pythonsdk). Python SDK extras
-
-The Python SDK adds some convenience features that are particularly useful in crunch scripts, in addition to the standard set of API calls.
-
-In a crunch job, the environment variables @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ will be set up so the job has the privileges of the user who submitted the job.
-
-<pre>
-import arvados
-
-my_user = arvados.api().users().current().execute()
-my_uuid = my_user['uuid']
-</pre>
-
-h3. Get the current job and task parameters
-
-@arvados.current_job()@ and @arvados.current_task()@ are convenient ways to retrieve the current Job and Task, using the @JOB_UUID@ and @TASK_UUID@ environment variables provided to each crunch task process.
-
-<pre>
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-this_job_input = this_job['script_parameters']['input']
-this_task_input = this_task['parameters']['input']
-</pre>
-
-h3(#one_task_per_input). Queue a task for each input file
-
-A common pattern for a crunch job is to run one task to scan the input, and one task per input file to do the work.
-
-The @one_task_per_input_file()@ function implements this pattern. Pseudocode:
-
-<pre>
-if this is the job's first (default) task:
-    for each file in the 'input' collection:
-        queue a new task, with parameters['input'] = file
-    exit
-else:
-    return
-</pre>
-
-Usage:
-
-<pre>
-import arvados
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
-
-# Now do the work on a single file
-my_input = this_task['parameters']['input']
-</pre>
-
-h3. Set the current task's output and success flag
-
-Each task in a crunch job must make an API call to record its output and set its @success@ attribute to True. The object returned by @current_task()@ has a @set_output()@ method to make the process more succinct.
-
-<pre>
-arvados.current_task().set_output(my_output_locator)
-</pre>
-
-h3. arvados_ipc.py
-
-Manage child processes and FIFOs (pipes).
-
-
-This module makes it easier to check the exit status of every child process you start, and close the unused end of each FIFO at the appropriate time.
-
-<pre>
-from arvados_ipc import *
-
-children = {}
-pipes = {}
-
-pipe_setup(pipes, 'hellopipe')
-if 0 == named_fork(children, 'child_a'):
-    pipe_closeallbut(pipes, ('hellopipe', 'w'))
-    os.write(pipes['hellopipe', 'w'], "Hello, parent.")
-    os._exit(0)
-
-pipe_closeallbut(pipes, ('hellopipe', 'r'))
-with os.fdopen(pipes['hellopipe', 'r'], 'rb') as f:
-    message = f.read()
-    sys.stderr.write("Child says: " + message + "\n")
-
-if not waitpid_and_check_children(children):
-    raise Exception("Child process exited non-zero.")
-</pre>
-
-The "crunch scripts" included with Arvados include some more examples of using the arvados_ipc module.
-
-h2(#toolkit_wrappers). Toolkit wrappers
-
-The following *arvados-&lowast;.py* modules provide "extract, build, run" helpers to make it easy to incorporate common analysis tools in your crunch scripts.
-
-h3. arvados_bwa.py
-
-Build and run the "bwa":http://bio-bwa.sourceforge.net/bwa.shtml program.
-
-The module retrieves the bwa source code from Keep, using the job's @bwa_tbz@ parameter.
-
-<pre>
-import arvados_bwa
-arvados_bwa.run('aln', [ref_basename, '-'],
-                stdin=open(fastq_filename,'rb'),
-                stdout=open(aln_filename,'wb'))
-</pre>
-
-On qr1hi.arvadosapi.com, the source distribution @bwa-0.7.5a.tar.bz2@ is available in the collection @8b6e2c4916133e1d859c9e812861ce13+70@.
-
-<pre>
-{
- "script_parameters":{
-  "bwa_tbz":"8b6e2c4916133e1d859c9e812861ce13+70",
-  ...
- },
- ...
-}
-</pre>
-
-h3. arvados_gatk2.py
-
-Extract and run the "Genome Analysis Toolkit":http://www.broadinstitute.org/gatk/ programs.
-
-The module retrieves the binary distribution tarball from Keep, using the job's @gatk_tbz@ parameter.
-
-<pre>
-arvados_gatk2.run(
-    args=[
-        '-nct', 8,
-        '-T', 'BaseRecalibrator',
-        '-R', ref_fasta_files[0],
-        '-I', input_bam_files[0],
-        '-o', recal_file,
-        ])
-</pre>
-
-On qr1hi.arvadosapi.com, the binary distribution @GenomeAnalysisTK-2.6-4.tar.bz2@ is available in the collection @5790482512cf6d5d6dfd50b7fd61e1d1+86@.
-
-The GATK data bundle is available in the collection @d237a90bae3870b3b033aea1e99de4a9+10820@.
-
-<pre>
-{
- "script_parameters":{
-  "gatk_tbz":"7e0a277d6d2353678a11f56bab3b13f2+87",
-  "gatk_bundle":"d237a90bae3870b3b033aea1e99de4a9+10820",
-  ...
- },
- ...
-}
-</pre>
-
-h3. arvados_samtools.py
-
-Build and run the "samtools":http://samtools.sourceforge.net/samtools.shtml program.
-
-
-The module retrieves the samtools source code from Keep, using the job's @samtools_tgz@ parameter.
-
-<pre>
-import arvados_samtools
-arvados_samtools.run('view', ['-S', '-b', '-'],
-                     stdin=open(sam_filename,'rb'),
-                     stdout=open(bam_filename,'wb'))
-</pre>
-
-On qr1hi.arvadosapi.com, the source distribution @samtools-0.1.19.tar.gz@ is available in the collection @c777e23cf13e5d5906abfdc08d84bfdb+74@.
-
-<pre>
-{
- "script_parameters":{
-  "samtools_tgz":"c777e23cf13e5d5906abfdc08d84bfdb+74",
-  ...
- },
- ...
-}
-</pre>
-
-
-h3. arvados_picard.py
-
-Build and run the "picard":http://picard.sourceforge.net/command-line-overview.shtml program.
-
-
-The module retrieves the picard binary distribution from Keep, using the job's @picard_zip@ parameter.
-
-<pre>
-import arvados_picard
-arvados_picard.run(
-    'FixMateInformation',
-    params={
-        'i': input_bam_path,
-        'o': '/dev/stdout',
-        'quiet': 'true',
-        'so': 'coordinate',
-        'validation_stringency': 'LENIENT',
-        'compression_level': 0
-        },
-    stdout=open('out.bam','wb'))
-</pre>
-
-On qr1hi.arvadosapi.com, the binary distribution @picard-tools-1.82.zip@ is available in the collection @687f74675c6a0e925dec619cc2bec25f+77@.
-
-<pre>
-{
- "script_parameters":{
-  "picard_zip":"687f74675c6a0e925dec619cc2bec25f+77",
-  ...
- },
- ...
-}
-</pre>
index 27970f440a74ed1e89342b394d28929858c300c0..725528f44d14e01e663c81fc317eaba1bde3886d 100644 (file)
@@ -32,25 +32,25 @@ table(table table-bordered table-condensed).
 |==--debug==|               Print even more logging|
 |==--metrics==|             Print timing metrics|
 |==--tool-help==|           Print command line help for tool|
-|==--enable-reuse==|        Enable job or container reuse (default)|
-|==--disable-reuse==|       Disable job or container reuse|
-|==--project-uuid UUID==|   Project that will own the workflow jobs, if not provided, will go to home project.|
+|==--enable-reuse==|        Enable container reuse (default)|
+|==--disable-reuse==|       Disable container reuse|
+|==--project-uuid UUID==|   Project that will own the workflow containers, if not provided, will go to home project.|
 |==--output-name OUTPUT_NAME==|Name to use for collection that stores the final output.|
 |==--output-tags OUTPUT_TAGS==|Tags for the final output collection separated by commas, e.g., =='--output-tags tag0,tag1,tag2'==.|
-|==--ignore-docker-for-reuse==|Ignore Docker image version when deciding whether to reuse past jobs.|
+|==--ignore-docker-for-reuse==|Ignore Docker image version when deciding whether to reuse past containers.|
 |==--submit==|              Submit workflow runner to Arvados to manage the workflow (default).|
-|==--local==|               Run workflow on local host (still submits jobs to Arvados).|
+|==--local==|               Run workflow on local host (still submits containers to Arvados).|
 |==--create-template==|     (Deprecated) synonym for --create-workflow.|
-|==--create-workflow==|     Create an Arvados workflow (if using the 'containers' API) or pipeline template (if using the 'jobs' API). See --api.|
+|==--create-workflow==|     Register an Arvados workflow that can be run from Workbench|
 |==--update-workflow== UUID|Update an existing Arvados workflow or pipeline template with the given UUID.|
-|==--wait==|                After submitting workflow runner job, wait for completion.|
-|==--no-wait==|             Submit workflow runner job and exit.|
+|==--wait==|                After submitting workflow runner, wait for completion.|
+|==--no-wait==|             Submit workflow runner and exit.|
 |==--log-timestamps==|      Prefix logging lines with timestamp|
 |==--no-log-timestamps==|   No timestamp on logging lines|
-|==--api== {jobs,containers}|Select work submission API. Default is 'jobs' if that API is available, otherwise 'containers'.|
+|==--api== {containers}|Select work submission API.  Only supports 'containers'|
 |==--compute-checksum==|    Compute checksum of contents while collecting outputs|
-|==--submit-runner-ram== SUBMIT_RUNNER_RAM|RAM (in MiB) required for the workflow runner job (default 1024)|
-|==--submit-runner-image== SUBMIT_RUNNER_IMAGE|Docker image for workflow runner job|
+|==--submit-runner-ram== SUBMIT_RUNNER_RAM|RAM (in MiB) required for the workflow runner (default 1024)|
+|==--submit-runner-image== SUBMIT_RUNNER_IMAGE|Docker image for workflow runner|
 |==--always-submit-runner==|When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool|
 |==--submit-request-uuid== UUID|Update and commit to supplied container request instead of creating a new one (containers API only).|
 |==--submit-runner-cluster== CLUSTER_ID|Submit workflow runner to a remote cluster (containers API only)|
@@ -60,7 +60,7 @@ table(table table-bordered table-condensed).
 |==--storage-classes== STORAGE_CLASSES|Specify comma separated list of storage classes to be used when saving workflow output to Keep.|
 |==--intermediate-output-ttl== N|If N > 0, intermediate output collections will be trashed N seconds after creation. Default is 0 (don't trash).|
 |==--priority== PRIORITY|Workflow priority (range 1..1000, higher has precedence over lower, containers api only)|
-|==--thread-count== THREAD_COUNT|Number of threads to use for job submit and output collection.|
+|==--thread-count== THREAD_COUNT|Number of threads to use for container submit and output collection.|
 |==--http-timeout== HTTP_TIMEOUT|API request timeout in seconds. Default is 300 seconds (5 minutes).|
 |==--trash-intermediate==|Immediately trash intermediate outputs on workflow success.|
 |==--no-trash-intermediate==|Do not trash intermediate outputs (default).|
@@ -106,7 +106,7 @@ qr1hi-8i9sb-fm2n3b1w0l6bskg
 
 h3(#local). Control a workflow locally
 
-To run a workflow with local control, use @--local@.  This means that the host where you run @arvados-cwl-runner@ will be responsible for submitting jobs, however, the jobs themselves will still run on the Arvados cluster.  With @--local@, if you interrupt @arvados-cwl-runner@ or log out, the workflow will be terminated.
+To run a workflow with local control, use @--local@.  This means that the host where you run @arvados-cwl-runner@ will be responsible for submitting containers, however, the containers themselves will still run on the Arvados cluster.  With @--local@, if you interrupt @arvados-cwl-runner@ or log out, the workflow will be terminated.
 
 <notextile>
 <pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --local bwa-mem.cwl bwa-mem-input.yml</span>
index 93b2e59072a0f93d68d4a1bfe080eb184bcd7a7a..7bb9fdcbe602c5009242798295b043cfb2508204 100644 (file)
@@ -18,17 +18,3 @@ If a step requires network access, use "NetworkAccess":https://www.commonwl.org/
 To prevent misbehaving steps from running forever and wasting resources, you can fail the step if it exceeds a certain running time with "ToolTimeLimit":https://www.commonwl.org/v1.1/CommandLineTool.html#ToolTimeLimit instead of the deprecated @cwltool:TimeLimit@ .
 
 To control if an individual step can be reused, use "WorkReuse":https://www.commonwl.org/v1.1/CommandLineTool.html#WorkReuse instead of the deprecated @arv:ReuseRequirement@.
-
-h2(#migrate). Differences in running CWL on the legacy jobs API vs containers API
-
-Most users can ignore this section.
-
-When migrating your Arvados cluster from using the jobs API (--api=jobs) (sometimes referred to as "crunch v1") to the containers API (--api=containers) ("crunch v2") there are a few differences in behavior:
-
-A tool may fail to find an input file that could be found when run under the jobs API.  This is because tools are limited to accessing collections explicitly listed in the input, and further limited to those individual files or subdirectories that are listed.  For example, given an explicit file input @/dir/subdir/file1.txt@, a tool will not be allowed to implicitly access a file in the parent directory @/dir/file2.txt@.  Use @secondaryFiles@ or a @Directory@ for files that need to be grouped together.
-
-A tool may fail when attempting to rename or delete a file in the output directory.  This may happen because files listed in @InitialWorkDirRequirement@ appear in the output directory as normal files (not symlinks) but cannot be moved, renamed or deleted unless marked as "writable" in CWL.  These files will be added to the output collection but without any additional copies of the underlying data.
-
-A tool may fail when attempting to access the network.  This may happen because, unlike the jobs API, under the containers API network access is disabled by default.  Tools which require network access should add "arv:APIRequirement: {}":cwl-extensions.html#APIRequirement to the @requirements@ section.
-
-CWL v1.1 is not supported by the Jobs API.
diff --git a/doc/user/examples/crunch-examples.html.textile.liquid b/doc/user/examples/crunch-examples.html.textile.liquid
deleted file mode 100644 (file)
index c93766a..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Scripts provided by Arvados"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'pipeline_deprecation_notice' %}
-
-Several crunch scripts are included with Arvados in the "/crunch_scripts directory":https://dev.arvados.org/projects/arvados/repository/revisions/master/show/crunch_scripts. They are intended to provide examples and starting points for writing your own scripts.
-
-h4. bwa-aln
-
-Run the bwa aligner on a set of paired-end fastq files, producing a BAM file for each pair. "View source.":https://dev.arvados.org/projects/arvados/repository/revisions/master/entry/crunch_scripts/bwa-aln
-
-<div class="offset1">
-table(table table-bordered table-condensed).
-|_Parameter_|_Description_|_Example_|
-|bwa_tbz|Collection with the bwa source distribution.|@8b6e2c4916133e1d859c9e812861ce13+70@|
-|samtools_tgz|Collection with the samtools source distribution.|@c777e23cf13e5d5906abfdc08d84bfdb+74@|
-|input|Collection with fastq reads (pairs of *_1.fastq.gz and *_2.fastq.gz).|@d0136bc494c21f79fc1b6a390561e6cb+2778@|
-</div>
-
-h4. bwa-index
-
-Generate an index of a fasta reference genome suitable for use by bwa-aln. "View source.":https://dev.arvados.org/projects/arvados/repository/revisions/master/entry/crunch_scripts/bwa-index
-
-<div class="offset1">
-table(table table-bordered table-condensed).
-|_Parameter_|_Description_|_Example_|
-|bwa_tbz|Collection with the bwa source distribution.|@8b6e2c4916133e1d859c9e812861ce13+70@|
-|input|Collection with reference data (*.fasta.gz, *.fasta.fai.gz, *.dict.gz).|@c361dbf46ee3397b0958802b346e9b5a+925@|
-</div>
-
-h4. picard-gatk2-prep
-
-Using the FixMateInformation, SortSam, ReorderSam, AddOrReplaceReadGroups, and BuildBamIndex modules from picard, prepare a BAM file for use with the GATK2 tools. Additionally, run picard's CollectAlignmentSummaryMetrics module to produce a @*.casm.tsv@ statistics file for each BAM file. "View source.":https://dev.arvados.org/projects/arvados/repository/revisions/master/entry/crunch_scripts/picard-gatk2-prep
-
-<div class="offset1">
-table(table table-bordered table-condensed).
-|_Parameter_|_Description_|_Example_|
-|input|Collection containing aligned bam files.||
-|picard_zip|Collection with the picard binary distribution.|@687f74675c6a0e925dec619cc2bec25f+77@|
-|reference|Collection with reference data (*.fasta.gz, *.fasta.fai.gz, *.dict.gz).|@c361dbf46ee3397b0958802b346e9b5a+925@|
-</div>
-
-h4. GATK2-realign
-
-Run GATK's RealignerTargetCreator and IndelRealigner modules on a set of BAM files. "View source.":https://dev.arvados.org/projects/arvados/repository/revisions/master/entry/crunch_scripts/GATK2-realign
-
-<div class="offset1">
-table(table table-bordered table-condensed).
-|_Parameter_|_Description_|_Example_|
-|input|Collection containing aligned bam files.||
-|picard_zip|Collection with the picard binary distribution.|@687f74675c6a0e925dec619cc2bec25f+77@|
-|gatk_tbz|Collection with the GATK2 binary distribution.|@7e0a277d6d2353678a11f56bab3b13f2+87@|
-|gatk_bundle|Collection with the GATK data bundle.|@d237a90bae3870b3b033aea1e99de4a9+10820@|
-|known_sites|List of files in the data bundle to use as GATK @-known@ arguments. Optional. |@["dbsnp_137.b37.vcf","Mills_and_1000G_gold_standard.indels.b37.vcf"]@ (this is the default value)|
-|regions|Collection with .bed files indicating sequencing target regions. Optional.||
-|region_padding|Corresponds to GATK @--interval_padding@ argument. Required if a regions parameter is given.|10|
-</div>
-
-h4. GATK2-bqsr
-
-Run GATK's BaseQualityScoreRecalibration module on a set of BAM files. "View source.":https://dev.arvados.org/projects/arvados/repository/revisions/master/entry/crunch_scripts/GATK2-bqsr
-
-<div class="offset1">
-table(table table-bordered table-condensed).
-|_Parameter_|_Description_|_Example_|
-|input|Collection containing bam files.||
-|gatk_tbz|Collection with the GATK2 binary distribution.|@7e0a277d6d2353678a11f56bab3b13f2+87@|
-|gatk_bundle|Collection with the GATK data bundle.|@d237a90bae3870b3b033aea1e99de4a9+10820@|
-</div>
-
-h4. GATK2-merge-call
-
-Merge a set of BAM files using picard, and run GATK's UnifiedGenotyper module on the merged set to produce a VCF file. "View source.":https://dev.arvados.org/projects/arvados/repository/revisions/master/entry/crunch_scripts/GATK2-merge-call
-
-<div class="offset1">
-table(table table-bordered table-condensed).
-|_Parameter_|_Description_|_Example_|
-|input|Collection containing bam files.||
-|picard_zip|Collection with the picard binary distribution.|@687f74675c6a0e925dec619cc2bec25f+77@|
-|gatk_tbz|Collection with the GATK2 binary distribution.|@7e0a277d6d2353678a11f56bab3b13f2+87@|
-|gatk_bundle|Collection with the GATK data bundle.|@d237a90bae3870b3b033aea1e99de4a9+10820@|
-|regions|Collection with .bed files indicating sequencing target regions. Optional.||
-|region_padding|Corresponds to GATK @--interval_padding@ argument. Required if a regions parameter is given.|10|
-</div>
-
-h4. file-select
-
-Pass through the named files from input to output collection, and ignore the rest. "View source.":https://dev.arvados.org/projects/arvados/repository/revisions/master/entry/crunch_scripts/file-select
-
-<div class="offset1">
-table(table table-bordered table-condensed).
-|_Parameter_|_Description_|_Example_|
-|names|List of filenames to include in the output.|@["human_g1k_v37.fasta.gz","human_g1k_v37.fasta.fai.gz"]@|
-</div>
diff --git a/doc/user/reference/job-pipeline-ref.html.textile.liquid b/doc/user/reference/job-pipeline-ref.html.textile.liquid
deleted file mode 100644 (file)
index f80cec9..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Pipeline template reference"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'pipeline_deprecation_notice' %}
-
-Pipeline template options are described on the "pipeline template schema page.":{{site.baseurl}}/api/methods/pipeline_templates.html
diff --git a/doc/user/topics/arv-run.html.textile.liquid b/doc/user/topics/arv-run.html.textile.liquid
deleted file mode 100644 (file)
index 9752ca7..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Using arv-run"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'crunch1only_begin' %}
-On those sites, the features described here are not yet implemented.
-{% include 'crunch1only_end' %}
-
-The @arv-run@ command enables you create Arvados pipelines at the command line that fan out to multiple concurrent tasks across Arvados compute nodes.
-
-{% include 'tutorial_expectations' %}
-
-h1. Usage
-
-Using @arv-run@ you can write and test command lines interactively, then insert @arv-run@ at the beginning of the command line to run the command on Arvados.  For example:
-
-<notextile>
-<pre>
-$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span>
-$ <span class="userinput">ls *.fastq</span>
-HWI-ST1027_129_D0THKACXX.1_1.fastq  HWI-ST1027_129_D0THKACXX.1_2.fastq
-$ <span class="userinput">grep -H -n ATTGGAGGAAAGATGAGTGAC HWI-ST1027_129_D0THKACXX.1_1.fastq</span>
-HWI-ST1027_129_D0THKACXX.1_1.fastq:14:TCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCCCAACCTA
-HWI-ST1027_129_D0THKACXX.1_1.fastq:18:AACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCT
-HWI-ST1027_129_D0THKACXX.1_1.fastq:30:ATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCTGTGATACG
-$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC HWI-ST1027_129_D0THKACXX.1_1.fastq</span>
-Running pipeline qr1hi-d1hrv-mg3bju0u7r6w241
-[...]
- 0 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq
- 0 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:14:TCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCCCAACCTA
- 0 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:18:AACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCT
- 0 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:30:ATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCTGTGATACG
- 0 stderr run-command: completed with exit code 0 (success)
-[...]
-</pre>
-</notextile>
-
-A key feature of @arv-run@ is the ability to introspect the command line to determine which arguments are file inputs, and transform those paths so they are usable inside the Arvados container.  In the above example, @HWI-ST1027_129_D0THKACXX.1_2.fastq@ is transformed into @/keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq@.  @arv-run@ also works together with @arv-mount@ to identify that the file is already part of an Arvados collection.  In this case, it will use the existing collection without any upload step.  If you specify a file that is only available on the local filesystem, @arv-run@ will upload a new collection.
-
-If you find that @arv-run@ is incorrectly rewriting one of your command line arguments, place a backslash @\@ at the beginning of the affected argument to quote it (suppress rewriting).
-
-h2. Parallel tasks
-
-@arv-run@ will parallelize over files listed on the command line after @--@.
-
-<notextile>
-<pre>
-$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span>
-$ <span class="userinput">ls *.fastq</span>
-HWI-ST1027_129_D0THKACXX.1_1.fastq  HWI-ST1027_129_D0THKACXX.1_2.fastq
-$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC -- *.fastq</span>
-Running pipeline qr1hi-d1hrv-mg3bju0u7r6w241
-[...]
- 0 stderr run-command: parallelizing on input0 with items [u'/keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq', u'/keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq']
-[...]
- 1 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq
- 2 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq
-[...]
- 1 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:14:TCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCCCAACCTA
- 1 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:18:AACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCT
- 1 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:30:ATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCTGTGATACG
- 1 stderr run-command: completed with exit code 0 (success)
- 2 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq:34:CTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAG
- 2 stderr run-command: completed with exit code 0 (success)
-</pre>
-</notextile>
-
-You may specify @--batch-size N@ (or the short form @-bN@) after the @--@ but before listing any files to specify how many files to provide put on the command line for each task.  See "Putting it all together" below for an example.
-
-h2. Redirection
-
-You may use standard input (@<@) and standard output (@>@) redirection.  This will create a separate task for each file listed in standard input.  You are only permitted to supply a single file name for stdout @>@ redirection.  If there are multiple tasks with their output sent to the same file, the output will be collated at the end of the pipeline.
-
-(Note: because the syntax is designed to mimic standard shell syntax, it is necessary to quote the metacharacters @<@, @>@ and @|@ as either @\<@, @\>@ and @\|@ or @'<'@, @'>'@ and @'|'@.)
-
-{% include 'arv_run_redirection' %}
-
-You may use "run-command":run-command.html parameter substitution in the output file name to generate different filenames for each task:
-
-<notextile>
-<pre>
-$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span>
-$ <span class="userinput">ls *.fastq</span>
-$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC \< *.fastq \> '$(task.uuid).txt'</span>
-[...]
- 1 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq > qr1hi-ot0gb-hmmxf2zubfpmhfk.txt
- 2 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq > qr1hi-ot0gb-iu2xgy4hkx4mmri.txt
- 1 stderr run-command: completed with exit code 0 (success)
- 1 stderr run-command: the following output files will be saved to keep:
- 1 stderr run-command:          363 ./qr1hi-ot0gb-hmmxf2zubfpmhfk.txt
- 1 stderr run-command: start writing output to keep
- 1 stderr upload wrote 363 total 363
- 2 stderr run-command: completed with exit code 0 (success)
- 2 stderr run-command: the following output files will be saved to keep:
- 2 stderr run-command:          121 ./qr1hi-ot0gb-iu2xgy4hkx4mmri.txt
- 2 stderr run-command: start writing output to keep
- 2 stderr upload wrote 121 total 121
-[...]
-</pre>
-</notextile>
-
-h2. Pipes
-
-Multiple commands may be connected by pipes and execute in the same container:
-
-<notextile>
-<pre>
-$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span>
-$ <span class="userinput">ls *.fastq</span>
-$ <span class="userinput">arv-run cat -- *.fastq \| grep -H -n ATTGGAGGAAAGATGAGTGAC \> output.txt</span>
-[...]
- 1 stderr run-command: cat /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq | grep -H -n ATTGGAGGAAAGATGAGTGAC > output.txt
- 2 stderr run-command: cat /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq | grep -H -n ATTGGAGGAAAGATGAGTGAC > output.txt
-[...]
-</pre>
-</notextile>
-
-If you need to capture intermediate results of a pipe, use the @tee@ command.
-
-h2. Running a shell script
-
-<notextile>
-<pre>
-$ <span class="userinput">echo 'echo hello world' > hello.sh</span>
-$ <span class="userinput">arv-run /bin/sh hello.sh</span>
-Upload local files: "hello.sh"
-Uploaded to qr1hi-4zz18-23u3hxugbm71qmn
-Running pipeline qr1hi-d1hrv-slcnhq5czo764b1
-[...]
- 0 stderr run-command: /bin/sh /keep/5d3a4131b7d8f233f2a917d8a5c3c2b2+52/hello.sh
- 0 stderr hello world
- 0 stderr run-command: completed with exit code 0 (success)
-[...]
-</pre>
-</notextile>
-
-h2. Additional options
-
-* @--docker-image IMG@ : By default, commands run based in a container created from the @default_docker_image_for_jobs@ setting on the API server.  Use this option to specify a different image to use.  Note: the Docker image must be uploaded to Arvados using @arv keep docker@.
-* @--dry-run@ : Print out the final Arvados pipeline generated by @arv-run@ without submitting it.
-* @--local@ : By default, the pipeline will be submitted to your configured Arvados instance.  Use this option to run the command locally using @arv-run-pipeline-instance --run-jobs-here@.
-* @--ignore-rcode@ : Some commands use non-zero exit codes to indicate nonfatal conditions (e.g., @grep@ returns 1 when no match is found).  Set this to indicate that commands that return non-zero return codes should not be considered failed.
-* @--no-wait@ : Do not wait and display logs after submitting command, just exit.
-
-h2. Putting it all together: bwa mem
-
-<notextile>
-<pre>
-$ <span class="userinput">cd ~/keep/by_id/d0136bc494c21f79fc1b6a390561e6cb+2778</span>
-$ <span class="userinput">arv-run --docker-image arvados/jobs-java-bwa-samtools bwa mem ../3514b8e5da0e8d109946bc809b20a78a+5698/human_g1k_v37.fasta -- --batch-size 2 *.fastq.gz \> '$(task.uuid).sam'</span>
- 0 stderr run-command: parallelizing on input0 with items [[u'/keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.1_1.fastq.gz', u'/keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.1_2.fastq.gz'], [u'/keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.2_1.fastq.gz', u'/keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.2_2.fastq.gz']]
-[...]
- 1 stderr run-command: bwa mem /keep/3514b8e5da0e8d109946bc809b20a78a+5698/human_g1k_v37.fasta /keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.1_1.fastq.gz /keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.1_2.fastq.gz > qr1hi-ot0gb-a4bzzyqqz4ubair.sam
- 2 stderr run-command: bwa mem /keep/3514b8e5da0e8d109946bc809b20a78a+5698/human_g1k_v37.fasta /keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.2_1.fastq.gz /keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.2_2.fastq.gz > qr1hi-ot0gb-14j9ncw0ymkxq0v.sam
-</pre>
-</notextile>
diff --git a/doc/user/topics/crunch-tools-overview.html.textile.liquid b/doc/user/topics/crunch-tools-overview.html.textile.liquid
deleted file mode 100644 (file)
index c4d01cf..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Tools for writing Crunch pipelines"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'pipeline_deprecation_notice' %}
-
-Arvados includes a number of tools to help you develop pipelines and jobs for Crunch.  This overview explains each tool's intended use to help you choose the right one.
-
-h2. Use the "arv-run command-line utility":arv-run.html
-
-arv-run is an interactive command-line tool.  You run it as the first command of a traditional Unix shell command line, and it converts that work into an Arvados pipeline.  It automatically uploads any required data to Arvados, and dispatches work in parallel when possible.  This lets you easily migrate analysis work that you're doing on the command line to Arvados compute nodes.
-
-arv-run is best suited to complement work you already do on the command line.  If you write a shell one-liner that generates useful data, you can then call it with arv-run to parallelize it across a larger data set and save the results in Arvados.  For example, this run searches multiple FASTQ files in parallel, and saves the results to Keep through shell redirection:
-
-{% include 'arv_run_redirection' %}
-
-arv-run does not generate pipeline templates, or implement higher-level shell constructs like flow control.  If you want to make it easy to rerun your pipeline with different data later, or adapt to different inputs, it's best to write your own template.
-
-Refer to the "arv-run documentation":arv-run.html for details.
-
-h2. Write a "pipeline template":{{site.baseurl}}/user/tutorials/running-external-program.html
-
-Pipeline templates describe a set of analysis programs that should be run, and the inputs they require.  You can provide a high-level description of how data flows through the pipeline—for example, the outputs of programs A and B are provided as input to program C—and let Crunch take care of the details of starting the individual programs at the right time with the inputs you specified.
-
-Pipeline templates are written in JSON.  Once you save a pipeline template in Arvados, you run it by creating a pipeline instance that lists the specific inputs you'd like to use.  Arvados Workbench and the @arv pipeline run@ command-line tool both provide high-level interfaces to do this easily.  The pipeline's final output(s) will be saved in a project you specify.
-
-See the User Guide topic to learn how to "write and run your own pipelines":{{site.baseurl}}/user/tutorials/running-external-program.html.  The rest of this page suggests specific tools to use in your templates.
-
-h3. The "run-command Crunch script":run-command.html
-
-run-command is a Crunch script that is included with Arvados.  It builds a command line from its input parameters.  It runs that command on files in Collections using the Keep mount provided by Crunch.  Output files created by the command are saved in a new collection, which is considered the program's final output.  It can run the command in parallel on a list of inputs, and introspect arguments so you can, for example, generate output filenames based on input filenames.
-
-run-command is a great way to use an existing analysis tool inside an Arvados pipeline.  You might use one or two tools in a larger pipeline, or convert a simple series of tool invocations into a pipeline to benefit from Arvados' provenance tracking and job reuse.  For example, here's a one-step pipeline that uses run-command with bwa to align a single paired-end read FASTQ sample:
-
-<notextile>{% code 'run_command_simple_example' as javascript %}</notextile>
-
-run-command is limited to manipulating the tool's command-line arguments, and can only parallelize on simple lists of inputs.  If you need to preprocess input, or dispatch work differently based on those inputs, consider writing your own Crunch script.
-
-Refer to the "run-command reference":run-command.html for details.
-
-h3. Writing "your own Crunch script":{{site.baseurl}}/user/tutorials/tutorial-firstscript.html with the Python SDK
-
-Arvados includes a Python SDK designed to help you write your own Crunch scripts.  It provides a native Arvados API client; Collection classes that provide file-like objects to interact with data in Keep; and utility functions to work within Crunch's execution environment.  Using the Python SDK, you can efficiently dispatch work with however much sophistication you require.
-
-Writing your own Crunch script is the best way to do analysis in Arvados when an existing tool does not meet your needs.  By interacting directly with Arvados objects, you'll have full power to introspect and adapt to your input, introduce minimal overhead, and get very direct error messages in case there's any trouble.  As a simple example, here's a Crunch script that checksums each file in a collection in parallel, saving the results in Keep:
-
-<notextile>{% code 'tutorial_hash_script_py' as python %}</notextile>
-
-There's no limit to what you can do with your own Crunch script.  The downside is the amount of time and effort you're required to invest to write and debug new code.  If you have to do that anyway, writing a Crunch script will give you the most benefit from using Arvados.
-
-Refer to the "User Guide topic on writing Crunch scripts":{{site.baseurl}}/user/tutorials/tutorial-firstscript.html and the "Python SDK reference":{{site.baseurl}}/sdk/python/python.html for details.
-
-h3. Combining run-command and custom Crunch scripts in a pipeline
-
-Just because you need to write some new code to do some work doesn't mean that you have to do all the work in your own Crunch script.  You can combine your custom steps with existing tools in a pipeline, passing data between them.  For example, maybe there's a third-party tool that does most of the analysis work you need, but you often need to massage the tool's data.  You could write your own preprocessing script that creates a new collection to use as the input of a run-command job, or a postprocessing script to create a final output after the tool is done, and tie them all together in a pipeline.  Just like Unix pipes, Arvados pipelines let you combine smaller tools to maximize utility.
-
-h3. Using run-command with your legacy scripts
-
-Perhaps you've already written your own analysis program that you want to run inside Arvados.  Currently, the easiest way to do that is to copy run-command from the Arvados source code to your own Arvados git repository, along with your internal tool.  Then your pipeline can call run-command from your own repository to execute the internal tool alongside it.
-
-This approach has the downside that you'll have to copy and push run-command again any time there's an update you'd like to use.  Future Arvados development will make it possible to get code from multiple git repositories, so your job can use the latest run-command in the Arvados source, as well as the latest tool in your own git repository.  Follow "Arvados issue #4561":https://arvados.org/issues/4561 for updates.
-
-Alternatively, you can "build a Docker image that includes your program, add it to Arvados":arv-docker.html, then run the Arvados run-command script inside that Docker image.
diff --git a/doc/user/topics/run-command.html.textile.liquid b/doc/user/topics/run-command.html.textile.liquid
deleted file mode 100644 (file)
index 6996475..0000000
+++ /dev/null
@@ -1,319 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "run-command reference"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'pipeline_deprecation_notice' %}
-
-The @run-command@ crunch script enables you run command line programs.
-
-{% include 'tutorial_expectations_workstation' %}
-
-h1. Using run-command
-
-The basic @run-command@ process evaluates its inputs and builds a command line, executes the command, and saves the contents of the output directory back to Keep.  For large datasets, @run-command@ can schedule concurrent tasks to execute the wrapped program over a range of inputs (see @task.foreach@ below.)
-
-@run-command@ is controlled through the @script_parameters@ section of a pipeline component.  @script_parameters@ is a JSON object consisting of key-value pairs.  There are three categories of keys that are meaningful to run-command:
-* The @command@ section defining the template to build the command line of task
-* Special processing directives such as @task.foreach@ @task.cwd@ @task.vwd@ @task.stdin@ @task.stdout@
-* User-defined parameters (everything else)
-
-In the following examples, you can use "dry run mode" to determine the command line that @run-command@ will use without actually running the command.  For example:
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd $HOME/arvados/crunch_scripts</span>
-~$ <span class="userinput">./run-command --dry-run --script-parameters '{
-  "command": ["echo", "hello world"]
-}'</span>
-run-command: echo hello world
-</code></pre>
-</notextile>
-
-h2. Command template
-
-The value of the "command" key is a list.  The first parameter of the list is the actual program to invoke, followed by the command arguments.  The simplest @run-command@ invocation simply runs a program with static parameters.  In this example, run "echo" with the first argument "hello world":
-
-<pre>
-{
-  "command": ["echo", "hello world"]
-}
-</pre>
-
-Running this job will print "hello world" to the job log.
-
-By default, the command will start with the current working directory set to the output directory.  Anything written to the output directory will be saved to Keep when the command is finished.  You can change the default working directory using @task.cwd@ and get the path to the output directory using @$(task.outdir)@ as explained below.
-
-Items in the "command" list may include lists and objects in addition to strings.  Lists are flattened to produce the final command line.  JSON objects are evaluated as list item functions (see below).  For example, the following evaluates to @["echo", "hello", "world"]@:
-
-<pre>
-{
-  "command": ["echo", ["hello", "world"]]
-}
-</pre>
-
-Finally, if "command" is a list of lists, it specifies a Unix pipeline where the standard output of the previous command is piped into the standard input of the next command.  The following example describes the Unix pipeline @cat foo | grep bar@:
-
-<pre>
-{
-  "command": [["cat", "foo"], ["grep", "bar"]]
-}
-</pre>
-
-h2. Parameter substitution
-
-The "command" list can include parameter substitutions.  Substitutions are enclosed in "$(...)" and may contain the name of a user-defined parameter.  In the following example, the value of "a" is "hello world"; so when "command" is evaluated, it will substitute "hello world" for "$(a)":
-
-<pre>
-{
-  "a": "c1bad4b39ca5a924e481008009d94e32+210/var-GS000016015-ASM.tsv.bz2",
-  "command": ["echo", "$(file $(a))"]
-}
-</pre>
-
-table(table table-bordered table-condensed).
-|_. Function|_. Action|
-|$(file ...)       | Takes a reference to a file within an Arvados collection and evaluates to a file path on the local file system where that file can be accessed by your command.  Will raise an error if the file is not accessible.|
-|$(dir ...)        | Takes a reference to an Arvados collection or directory within an Arvados collection and evaluates to a directory path on the local file system where that directory can be accessed by your command.  The path may include a file name, in which case it will evaluate to the parent directory of the file.  Uses Python's os.path.dirname(), so "/foo/bar" will evaluate to "/foo" but "/foo/bar/" will evaluate to "/foo/bar".  Will raise an error if the directory is not accessible. |
-|$(basename&nbsp;...)   | Strip leading directory and trailing file extension from the path provided.  For example, $(basename /foo/bar.baz.txt) will evaluate to "bar.baz".|
-|$(glob ...)       | Take a Unix shell path pattern (supports @*@ @?@ and @[]@) and search the local filesystem, returning the first match found.  Use together with $(dir ...) to get a local filesystem path for Arvados collections.  For example: $(glob $(dir $(mycollection)/*.bam)) will find the first .bam file in the collection specified by the user parameter "mycollection".  If there is more than one match, which one is returned is undefined.  Will raise an error if no matches are found.|
-|$(task.tmpdir)|Designated temporary directory.  This directory will be discarded when the job completes.|
-|$(task.outdir)|Designated output directory.  The contents of this directory will be saved to Keep when the job completes.  A symlink to a file in the keep mount will reference existing Keep blocks in your job output collection, with no data copying or duplication.|
-|$(job.srcdir)|Path to the git working directory ($CRUNCH_SRC).|
-|$(node.cores)|Number of CPU cores on the node.|
-|$(job.uuid)|Current job uuid ($JOB_UUID)|
-|$(task.uuid)|Current task uuid ($TASK_UUID)|
-
-h3. Escape sequences
-
-If your command includes a @$()@ sequence that shouldn't be interpreted by run-command&mdash;for example, because you're writing shell code that calls a subcommand&mdash;you can prevent run-command from interpreting it by placing a backslash in front of the @$@ character.  Note that JSON also uses backslash to escape characters, so you'll need to write two backslashes for run-command to see one after parsing the parameter.  This example uppercases all alphabetic characters in the "pattern" parameter before using it as a regular expression in grep:
-
-<pre>{"command": ["bash", "-c", "grep \\$(echo '$(pattern)' | tr a-z A-Z) '$(input)'"]}</pre>
-
-You can put a literal backslash in your command by escaping it with another backslash.  Ultimately this means that where the primary Unix command includes a single backslash, you'll need to write four backslashes: double the backslashes for run-command escaping, then double them again for JSON escaping.
-
-<pre>{"command": ["grep", "\\\\bword\\\\b", "$(input)"]}</pre>
-
-h2. List context
-
-Where specified by the documentation, parameters may be evaluated in a "list context".  That means the value will evaluate to a list instead of a string.  Parameter values can be a static list, a path to a file, a path to a directory, or a JSON object describing a list context function.
-
-If the value is a string, it is interpreted as a path.  If the path specifies a regular file, that file will be opened as a text file and produce a list with one item for each line in the file (end-of-line characters will be stripped).  If the path specifies a directory, produce a list containing all of the entries in the directory.  Note that parameter expansion is not performed on list items produced this way.
-
-If the value is a static list, it will evaluate each item and return the expanded list.  Each item may be a string (evaluated for parameter substitution), a list (recursively evaluated), or a JSON object (indicating a list function, described below).
-
-If the value is a JSON object, it is evaluated as a list function described below.
-
-h2. List functions
-
-When @run-command@ is evaluating a list (such as "command"), in addition to string parameter substitution, you can use list item functions.  In the following functions, you specify the name of a user parameter to act on (@"$(a)"@ in the first example); the value of that user parameter will be evaluated in a list context (as described above) to get the list value. Alternately, you can provide list value directly in line.  As an example, the following two fragments yield the same result:
-
-<pre>
-{
-  "a": ["alice", "bob"],
-  "command": ["echo", {"foreach": "$(a)",
-                       "var": "a_var",
-                       "command": ["--something", "$(a_var)"]}]
-}
-</pre>
-
-<pre>
-{
-  "command": ["echo", {"foreach": ["alice", "bob"],
-                       "var": "a_var",
-                       "command": ["--something", "$(a_var)"]}]
-}
-</pre>
-
-Note: when you provide the list inline with "foreach" or "index", you must include the "var" parameter to specify the substitution variable name to use when evaluating the command fragment.
-
-You can also nest functions.  This filters @["alice", "bob", "betty"]@ on the regular expression @"b.*"@ to get the list @["bob", "betty"]@, assigns @a_var@ to each value of the list, then expands @"command"@ to get @["--something", "bob", "--something", "betty"]@.
-
-<pre>
-{
-  "command": ["echo", {"foreach": {"filter": ["alice", "bob", "betty"],
-                                   "regex": "b.*"},
-                       "var": "a_var",
-                       "command": ["--something", "$(a_var)"]}]
-}
-</pre>
-
-h3. foreach
-
-The @foreach@ list item function (not to be confused with the @task.foreach@ directive) expands a command template for each item in the specified user parameter (the value of the user parameter is evaluated in a list context, as described above).  The following example will evaluate "command" to @["echo", "--something", "alice", "--something", "bob"]@:
-
-<pre>
-{
-  "a": ["alice", "bob"],
-  "command": ["echo", {"foreach": "$(a)",
-                       "var": "a_var",
-                       "command": ["--something", "$(a_var)"]}]
-}
-</pre>
-
-h3. index
-
-This function extracts a single item from a list.  The value of @index@ is zero-based (i.e. the first item is at index 0, the second item index 1, etc).  The following example will evaluate "command" to @["echo", "--something", "bob"]@:
-
-<pre>
-{
-  "a": ["alice", "bob"],
-  "command": ["echo", {"list": "$(a)",
-                       "var": "a_var",
-                       "index": 1,
-                       "command": ["--something", "$(a_var)"]}]
-}
-</pre>
-
-h3. filter
-
-Filter the list so that it only includes items that match a regular expression.  The following example will evaluate to @["echo", "bob"]@
-
-<pre>
-{
-  "a": ["alice", "bob"],
-  "command": ["echo", {"filter": "$(a)",
-                       "regex": "b.*"}]
-}
-</pre>
-
-h3. group
-
-Generate a list of lists, where items are grouped on common subexpression match.  Items which don't match the regular expression are excluded.  In the following example, the subexpression is @(a?)@, resulting in two groups, strings that contain the letter 'a' and strings that do not.  The following example evaluates to @["echo", "--group", "alice", "carol", "dave", "--group", "bob", "betty"]@:
-
-<pre>
-{
-  "a": ["alice", "bob", "betty", "carol", "dave"],
-  "b": {"group": "$(a)",
-        "regex": "[^a]*(a?).*"},
-  "command": ["echo", {"foreach": "$(b)",
-                       "var": "b_var",
-                       "command": ["--group", "$(b_var)"]}]
-}
-</pre>
-
-h3. extract
-
-Generate a list of lists, where items are split by subexpression match.  Items which don't match the regular expression are excluded.  The following example evaluates to @["echo", "--something", "c", "a", "rol", "--something", "d", "a", "ve"]@:
-
-<pre>
-{
-  "a": ["alice", "bob", "carol", "dave"],
-  "b": {"extract": "$(a)",
-        "regex": "(.+)(a)(.*)"},
-  "command": ["echo", {"foreach": "$(b)",
-                       "var": "b_var",
-                       "command": ["--something", "$(b_var)"]}]
-}
-</pre>
-
-h3. batch
-
-Generate a list of lists, where items are split into a batch size.  If the list does not divide evenly into batch sizes, the last batch will be short.  The following example evaluates to @["echo", "--something", "alice", "bob", "--something", "carol", "dave"]@
-
-<pre>
-{
-  "a": ["alice", "bob", "carol", "dave"],
-  "command": ["echo", {"foreach":{"batch": "$(a)",
-                                  "size": 2},
-                       "var": "a_var",
-                       "command": ["--something", "$(a_var)"]}]
-}
-</pre>
-
-h2. Directives
-
-Directives alter the behavior of run-command.  All directives are optional.
-
-h3. task.cwd
-
-This directive sets the initial current working directory in which your command will run.  If @task.cwd@ is not specified, the default current working directory is @task.outdir@.
-
-h3. task.ignore_rcode
-
-By Unix convention a task which exits with a non-zero return code is considered failed.  However, some programs (such as @grep@) return non-zero codes for conditions that should not be considered fatal errors.  Set @"task.ignore_rcode": true@ to indicate the task should always be considered a success regardless of the return code.
-
-h3. task.stdin and task.stdout
-
-Provide standard input and standard output redirection.
-
-@task.stdin@ must evaluate to a path to a file to be bound to the standard input stream of the command.  When command describes a Unix pipeline, this goes into the first command.
-
-@task.stdout@ specifies the desired file name in the output directory to save the content of standard output.  When command describes a Unix pipeline, this captures the output of the last command.
-
-h3. task.env
-
-Set environment variables for the command.  Accepts an object mapping environment variables to the desired values.  Parameter substitution is performed on values, but not on the environment variable names themselves.  Example usage:
-
-<pre>
-{
-  "command": ["/bin/sh", "-c", "echo $MY_ENV_VAR"],
-  "task.env": {
-    "MY_ENV_VAR": "Hello world!"
-  }
-}
-</pre>
-
-h3. task.vwd
-
-Background: because Keep collections are read-only, this does not play well with certain tools that expect to be able to write their outputs alongside their inputs (such as tools that generate indexes that are closely associated with the original file.)  The run-command's solution to this is the "virtual working directory".
-
-@task.vwd@ specifies a Keep collection with the starting contents of the output directory.  @run-command@ will populate @task.outdir@ with directories and symlinks to mirror the contents of the @task.vwd@ collection.  Your command will then be able to both access its input files and write its output files from within @task.outdir@.  When the command completes, run-command will write the contents of the output directory, which will include the output of your command as well as symlinks to files in starting collection.  Note that files from the starting collection remain read-only and cannot be altered, but may be deleted or renamed.
-
-h3. task.foreach
-
-Using @task.foreach@, you can run your command concurrently over large datasets.
-
-@task.foreach@ takes the names of one or more user-defined parameters.  The value of these parameters are evaluated in a list context.  @run-command@ then generates tasks based on the Cartesian product (i.e. all combinations) of the input lists.  The outputs of all tasks are merged to create the final output collection.  Note that if two tasks output a file in the same directory with the same name, that file will be concatenated in the final output.  In the following example, three tasks will be created for the "grep" command, based on the contents of user parameter "a":
-
-<pre>
-{
-  "command": ["echo", "$(a)"],
-  "task.foreach": "a",
-  "a": ["alice", "bob", "carol"]
-}
-</pre>
-
-This evaluates to the commands:
-<notextile>
-<pre>
-["echo", "alice"]
-["echo", "bob"]
-["echo", "carol"]
-</pre>
-</notextile>
-
-You can also specify multiple parameters:
-
-<pre>
-{
-  "a": ["alice", "bob"],
-  "b": ["carol", "dave"],
-  "task.foreach": ["a", "b"],
-  "command": ["echo", "$(a)", "$(b)"]
-}
-</pre>
-
-This evaluates to the commands:
-
-<pre>
-["echo", "alice", "carol"]
-["echo", "alice", "dave"]
-["echo", "bob", "carol"]
-["echo", "bob", "dave"]
-</pre>
-
-h1. Examples
-
-The following is a single task pipeline using @run-command@ to run the bwa alignment tool to align a single paired-end read fastq sample.  The input to this pipeline is the reference genome and a collection consisting of two fastq files for the read pair.
-
-<notextile>{% code 'run_command_simple_example' as javascript %}</notextile>
-
-The following is a concurrent task pipeline using @run-command@ to run the bwa alignment tool to align a set of fastq reads over multiple samples.  The input to this pipeline is the reference genome and a collection consisting subdirectories for each sample, with each subdirectory containing pairs of fastq files for each set of reads.
-
-<notextile>{% code 'run_command_foreach_example' as javascript %}</notextile>
diff --git a/doc/user/topics/running-pipeline-command-line.html.textile.liquid b/doc/user/topics/running-pipeline-command-line.html.textile.liquid
deleted file mode 100644 (file)
index ffa5710..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Running an Arvados pipeline"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'crunch1only_begin' %}
-If the Jobs API is not available, use the "Common Workflow Language":{{site.baseurl}}/user/cwl/cwl-runner.html instead.
-{% include 'crunch1only_end' %}
-
-This tutorial demonstrates how to use the command line to run the same pipeline as described in "running a pipeline using Workbench.":{{site.baseurl}}/user/tutorials/tutorial-workflow-workbench.html
-
-{% include 'tutorial_expectations' %}
-{% include 'tutorial_cluster_name' %}
-
-When you use the command line, you must use Arvados unique identifiers to refer to objects.  The identifiers in this example correspond to the following Arvados objects:
-
-* <i class="fa fa-fw fa-gear"></i> "Tutorial align using bwa mem (qr1hi-p5p6p-itzkwxblfermlwv)":{{site.arvados_workbench_host}}/pipeline_templates/qr1hi-p5p6p-itzkwxblfermlwv
-* <i class="fa fa-fw fa-archive"></i> "Tutorial chromosome 19 reference (2463fa9efeb75e099685528b3b9071e0+438)":{{site.arvados_workbench_host}}/collections/2463fa9efeb75e099685528b3b9071e0+438
-* <i class="fa fa-fw fa-archive"></i> "Tutorial sample exome (3229739b505d2b878b62aed09895a55a+142)":{{site.arvados_workbench_host}}/collections/3229739b505d2b878b62aed09895a55a+142
-
-Use @arv pipeline run@ to run the pipeline, supplying the inputs to the bwa-mem component on the command line:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv pipeline run --run-pipeline-here --template qr1hi-p5p6p-itzkwxblfermlwv bwa-mem::reference_collection=2463fa9efeb75e099685528b3b9071e0+438 bwa-mem::sample=3229739b505d2b878b62aed09895a55a+142</span>
-
-2014-07-25 18:05:26 +0000 -- pipeline_instance qr1hi-d1hrv-d14trje19pna7f2
-bwa-mem qr1hi-8i9sb-67n1qvsronmd2z6 queued 2014-07-25T18:05:25Z
-
-2014-07-25 18:05:36 +0000 -- pipeline_instance qr1hi-d1hrv-d14trje19pna7f2
-bwa-mem qr1hi-8i9sb-67n1qvsronmd2z6 {:done=>0, :running=>1, :failed=>0, :todo=>0}
-
-2014-07-25 18:05:46 +0000 -- pipeline_instance qr1hi-d1hrv-d14trje19pna7f2
-bwa-mem qr1hi-8i9sb-67n1qvsronmd2z6 49bae1066f4ebce72e2587a3efa61c7d+88
-</code></pre>
-</notextile>
-
-This instantiates your pipeline and displays periodic status reports in your terminal window. The new pipeline instance will also show up on the Workbench Dashboard.
-
-
-@arv pipeline run@ submits a job for each pipeline component as soon as the component's inputs are known (i.e., any dependencies are satsified). It terminates when there is no work left to do: this means either all components are satisfied and all jobs have completed successfully, _or_ one or more jobs have failed and it is therefore unproductive to submit any further jobs.
-
-The Keep locators of the output of the @bwa-mem@ components are available from the last status report shown above:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv keep ls -s 49bae1066f4ebce72e2587a3efa61c7d+88</span>
-     29226 ./HWI-ST1027_129_D0THKACXX.1_1.sam
-</code></pre>
-</notextile>
-
-h2. Re-using existing jobs and outputs
-
-When satisfying a pipeline component that is not marked as nondeterministic in the pipeline template, @arv pipeline run@ checks for a previously submitted job that satisfies the component's requirements. If such a job is found, @arv pipeline run@ uses the existing job rather than submitting a new one. Usually this is a safe way to conserve time and compute resources. In some cases it's desirable to re-run jobs with identical specifications (e.g., to demonstrate that a job or entire pipeline thought to be repeatable is in fact repeatable). For such cases, job re-use features can be disabled entirely by passing the @--no-reuse@ flag to the @arv pipeline run@ command.
diff --git a/doc/user/topics/tutorial-parallel.html.textile.liquid b/doc/user/topics/tutorial-parallel.html.textile.liquid
deleted file mode 100644 (file)
index 7d308dc..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Concurrent Crunch tasks"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'pipeline_deprecation_notice' %}
-
-In the previous tutorials, we used @arvados.job_setup.one_task_per_input_file()@ to automatically create concurrent jobs by creating a separate task per file.  For some types of jobs, you may need to split the work up differently, for example creating tasks to process different segments of a single large file.  This tutorial will demonstrate how to create Crunch tasks directly.
-
-Start by entering the @crunch_scripts@ directory of your Git repository:
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd $USER/crunch_scripts</span>
-</code></pre>
-</notextile>
-
-Next, using @nano@ or your favorite Unix text editor, create a new file called @concurrent-hash.py@ in the @crunch_scripts@ directory.
-
-notextile. <pre>~/$USER/crunch_scripts$ <code class="userinput">nano concurrent-hash.py</code></pre>
-
-Add the following code to compute the MD5 hash of each file in a collection:
-
-<notextile> {% code 'concurrent_hash_script_py' as python %} </notextile>
-
-Make the file executable:
-
-notextile. <pre><code>~/$USER/crunch_scripts$ <span class="userinput">chmod +x concurrent-hash.py</span></code></pre>
-
-Add the file to the Git staging area, commit, and push:
-
-<notextile>
-<pre><code>~/$USER/crunch_scripts$ <span class="userinput">git add concurrent-hash.py</span>
-~/$USER/crunch_scripts$ <span class="userinput">git commit -m"concurrent hash"</span>
-~/$USER/crunch_scripts$ <span class="userinput">git push origin master</span>
-</code></pre>
-</notextile>
-
-You should now be able to run your new script using Crunch, with "script" referring to our new "concurrent-hash.py" script.  We will use a different input from our previous examples.  We will use @887cd41e9c613463eab2f0d885c6dd96+83@ which consists of three files, "alice.txt", "bob.txt" and "carol.txt" (the example collection used previously in "fetching data from Arvados using Keep":{{site.baseurl}}/user/tutorials/tutorial-keep.html#dir).
-
-<notextile>
-<pre><code>~/$USER/crunch_scripts$ <span class="userinput">cat &gt;~/the_job &lt;&lt;EOF
-{
- "script": "concurrent-hash.py",
- "repository": "$USER/$USER",
- "script_version": "master",
- "script_parameters":
- {
-  "input": "887cd41e9c613463eab2f0d885c6dd96+83"
- }
-}
-EOF</span>
-~/$USER/crunch_scripts$ <span class="userinput">arv job create --job "$(cat ~/the_job)"</span>
-{
- ...
- "uuid":"qr1hi-xxxxx-xxxxxxxxxxxxxxx"
- ...
-}
-~/$USER/crunch_scripts$ <span class="userinput">arv job get --uuid qr1hi-xxxxx-xxxxxxxxxxxxxxx</span>
-{
- ...
- "output":"e2ccd204bca37c77c0ba59fc470cd0f7+162",
- ...
-}
-</code></pre>
-</notextile>
-
-(Your shell should automatically fill in @$USER@ with your login name.  The job JSON that gets saved should have @"repository"@ pointed at your personal Git repository.)
-
-Because the job ran in concurrent, each instance of concurrent-hash creates a separate @md5sum.txt@ as output.  Arvados automatically collates theses files into a single collection, which is the output of the job:
-
-<notextile>
-<pre><code>~/$USER/crunch_scripts$ <span class="userinput">arv keep ls e2ccd204bca37c77c0ba59fc470cd0f7+162</span>
-./md5sum.txt
-~/$USER/crunch_scripts$ <span class="userinput">arv-get e2ccd204bca37c77c0ba59fc470cd0f7+162/md5sum.txt</span>
-0f1d6bcf55c34bed7f92a805d2d89bbf alice.txt
-504938460ef369cd275e4ef58994cffe bob.txt
-8f3b36aff310e06f3c5b9e95678ff77a carol.txt
-</code></pre>
-</notextile>
diff --git a/doc/user/topics/tutorial-trait-search.html.textile.liquid b/doc/user/topics/tutorial-trait-search.html.textile.liquid
deleted file mode 100644 (file)
index d396802..0000000
+++ /dev/null
@@ -1,278 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Querying the Metadata Database"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'notebox_begin_warning' %}
-The humans, specimens and traits tables are deprecated and will be removed in a future release.  The recommended way to store and search on user-defined metadata is using the "properties" field of Arvados resources.
-{% include 'notebox_end' %}
-
-This tutorial introduces the Arvados Metadata Database.  The Metadata Database stores information about files in Keep.  This example will use the Python SDK to find public WGS (Whole Genome Sequencing) data for people who have reported a certain medical condition.
-
-{% include 'tutorial_expectations' %}
-
-In the tutorial examples, three angle brackets (&gt;&gt;&gt;) will be used to denote code to enter at the interactive Python prompt.
-
-Start by running Python.
-
-<notextile>
-<pre><code>~$ <span class="userinput">python</span>
-Python 2.7.3 (default, Jan  2 2013, 13:56:14)
-[GCC 4.7.2] on linux2
-Type "help", "copyright", "credits" or "license" for more information.
-&gt;&gt;&gt;
-</code></pre>
-</notextile>
-
-If everything is set up correctly, you will be able to import the arvados SDK.
-
-notextile. <pre><code>&gt;&gt;&gt; <span class="userinput">import arvados</span></pre></code>
-
-This tutorial will also use the regular expression (re) python module:
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">import re</span>
-</code></pre>
-</notextile>
-
-h2. Finding traits
-
-notextile. <pre><code>&gt;&gt;&gt; <span class="userinput">all_traits = arvados.api().traits().list(limit=1000).execute()</span></code></pre>
-
-* @arvados.api()@ gets an object that provides access to the Arvados API server
-* @.traits()@ gets an object that provides access to the "traits" resource on the Arvados API server
-* @.list(limit=1000)@ constructs a query to list all elements of the "traits" resource, with a limit of 1000 entries returned
-* @.execute()@ executes the query and returns the result, which we assign to "all_traits"
-
-notextile. <pre><code>&gt;&gt;&gt; <span class="userinput">cancer_traits = filter(lambda t: re.search('cancer', t['name']), all_traits['items'])</span></code></pre>
-
-* @lambda t: re.search('cancer', t['name'])@ is an inline function that takes a parameter @t@ and uses a simple regular expression to test if @t['name']@ contains the substring 'cancer'
-* @all_traits['items']@ is the input sequence of traits
-* @filter@ tests each element @t@ and constructs a new sequence consisting only of the elements that pass the filter
-* @cancer_traits@ gets the result of @filter@
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">for t in cancer_traits: print(t['uuid'], t['name'])</span>
-...
-qr1hi-q1cn2-8q57g2diohwnzm0 Cervical cancer
-qr1hi-q1cn2-vqp4243janpjbyj Breast cancer
-qr1hi-q1cn2-v6usijujcpwqrn1 Non-melanoma skin cancer
-...
-</code></pre>
-</notextile>
-
-In this tutorial wil will use "Non-melanoma skin cancer" trait with uuid @qr1hi-q1cn2-v6usijujcpwqrn1@.
-
-notextile. <pre><code>&gt;&gt;&gt; <span class="userinput">non_melanoma_cancer = 'qr1hi-q1cn2-v6usijujcpwqrn1'</code></pre>
-
-h2. Finding humans with the selected trait
-
-We query the "links" resource to find humans that report the selected trait.  Links are directional connections between Arvados data items, for example, from a human to their reported traits.
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">trait_filter = [
-    ['link_class', '=', 'human_trait'],
-    ['tail_uuid', 'is_a', 'arvados#human'],
-    ['head_uuid', '=', non_melanoma_cancer],
-  ]
-</code></pre>
-</notextile>
-
-* @['link_class', '=', 'human_trait']@ filters on links that connect phenotype traits to individuals in the database.
-* @['tail_uuid', 'is_a', 'arvados#human']@ filters that the "tail" must be a "human" database object.
-* @['head_uuid', '=', non_melanoma_cancer]@ filters that the "head" of the link must connect to the "trait" database object non_melanoma_cancer .
-
-The query will return links that match all three conditions.
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">trait_links = arvados.api().links().list(limit=1000, filters=trait_filter).execute()</span>
-</code></pre>
-</notextile>
-
-* @arvados.api()@ gets an object that provides access to the Arvados API server
-* @.links()@ gets an object that provides access to the "links" resource on the Arvados API server
-* @.list(limit=1000, filters=trait_filter)@ constructs a query to elements of the "links" resource that match the criteria discussed above, with a limit of 1000 entries returned
-* @.execute()@ executes the query and returns the result, which we assign to "trait_links"
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">human_uuids = map(lambda l: l['tail_uuid'], trait_links['items'])</span>
-&gt;&gt;&gt; <span class="userinput">human_uuids</span>
-[u'1h9kt-7a9it-c0uqa4kcdh29wdf', u'1h9kt-7a9it-x4tru6mn40hc6ah',
-u'1h9kt-7a9it-yqb8m5s9cpy88i8', u'1h9kt-7a9it-46sm75w200ngwny',
-u'1h9kt-7a9it-gx85a4tdkpzsg3w', u'1h9kt-7a9it-8cvlaa8909lgeo9',
-u'1h9kt-7a9it-as37qum2pq8vizb', u'1h9kt-7a9it-14fph66z2baqxb9',
-u'1h9kt-7a9it-e9zc7i4crmw3v69', u'1h9kt-7a9it-np7f35hlijlxdmt',
-u'1h9kt-7a9it-j9hqyjwbvo9cojn', u'1h9kt-7a9it-lqxdtm1gynmsv13',
-u'1h9kt-7a9it-zkhhxjfg2o22ywq', u'1h9kt-7a9it-nsjoxqd33lzldw9',
-u'1h9kt-7a9it-ytect4smzcgd4kg', u'1h9kt-7a9it-y6tl353b3jc4tos',
-u'1h9kt-7a9it-98f8qave4f8vbs5', u'1h9kt-7a9it-gd72sh15q0p4wq3',
-u'1h9kt-7a9it-zlx25dscak94q9h', u'1h9kt-7a9it-8gronw4rbgmim01',
-u'1h9kt-7a9it-wclfkjcb23tr5es', u'1h9kt-7a9it-rvp2qe7szfz4dy6',
-u'1h9kt-7a9it-50iffhmpzsktwjm', u'1h9kt-7a9it-ul412id5y31a5o8',
-u'1h9kt-7a9it-732kwkfzylmt4ik', u'1h9kt-7a9it-v9zqxegpblsbtai',
-u'1h9kt-7a9it-kmaraqduit1v5wd', u'1h9kt-7a9it-t1nwtlo1hru5vvq',
-u'1h9kt-7a9it-q3w6j9od4ibpoyl', u'1h9kt-7a9it-qz8vzkuuz97ezwv',
-u'1h9kt-7a9it-t1v8sjz6dm9jmjf', u'1h9kt-7a9it-qe8wrbyvuqs5jew']
-</code></pre>
-</notextile>
-
-* @lambda l: l['tail_uuid']@ is an inline function that returns the 'tail_uuid' attribute of 'l'
-* @trait_links['items']@ is the input set from the query
-* @map@ converts each item in a sequence into a different item using the embedded function, in this case to produce a sequence of uuids which refer to humans that have the specified trait.
-
-h2. Find Personal Genome Project identifiers from Arvados UUIDs
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">human_filters = [
-    ["link_class", "=", "identifier"],
-    ["head_uuid", "in", human_uuids]
-  ]</span>
-&gt;&gt;&gt; <span class="userinput">pgpid_links = arvados.api('v1').links().list(limit=1000, filters=human_filters).execute()</span>
-&gt;&gt;&gt; <span class="userinput">map(lambda l: l['name'], pgpid_links['items'])</span>
-[u'hu01024B', u'hu11603C', u'hu15402B', u'hu174334', u'hu1BD549', u'hu237A50',
- u'hu34A921', u'hu397733', u'hu414115', u'hu43860C', u'hu474789', u'hu553620',
- u'hu56B3B6', u'hu5917F3', u'hu599905', u'hu5E55F5', u'hu602487', u'hu633787',
- u'hu68F245', u'hu6C3F34', u'hu7260DD', u'hu7A2F1D', u'hu94040B', u'hu9E356F',
- u'huAB8707', u'huB1FD55', u'huB4883B', u'huD09050', u'huD09534', u'huD3A569',
- u'huDF04CC', u'huE2E371']
-</code></pre>
-</notextile>
-
-These PGP IDs let us find public profiles, for example:
-
-* "https://my.pgp-hms.org/profile/huE2E371":https://my.pgp-hms.org/profile/huE2E371
-* "https://my.pgp-hms.org/profile/huDF04CC":https://my.pgp-hms.org/profile/huDF04CC
-* ...
-
-h2. Find genomic data from specific humans
-
-Now we want to find collections in Keep that were provided by these humans.  We search the "links" resource for "provenance" links that point to entries in the list of humans with the non-melanoma skin cancer trait:
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">provenance_links = arvados.api().links().list(limit=1000, filters=[
-    ["link_class", "=", "provenance"],
-    ["name", "=", "provided"],
-    ["tail_uuid", "in", human_uuids]
-  ]).execute()
-collection_uuids = map(lambda l: l['head_uuid'], provenance_links['items'])
-
-# build map of human uuid -> PGP ID
-pgpid = {}
-for pgpid_link in pgpid_links['items']:
-  pgpid[pgpid_link['head_uuid']] = pgpid_link['name']
-
-# build map of collection uuid -> PGP ID
-for p_link in provenance_links['items']:
-  pgpid[p_link['head_uuid']] = pgpid[p_link['tail_uuid']]
-
-# get details (e.g., list of files) of each collection
-collections = arvados.api('v1').collections().list(filters=[
-    ["uuid", "in", collection_uuids]
-  ]).execute()
-
-# print PGP public profile links with file locators
-for c in collections['items']:
-  for f in c['files']:
-    print "https://my.pgp-hms.org/profile/%s %s %s%s" % (pgpid[c['uuid']], c['uuid'], ('' if f[0] == '.' else f[0]+'/'), f[1])
-</span>
-https://my.pgp-hms.org/profile/hu43860C a58dca7609fa84c8c38a7e926a97b2fc var-GS00253-DNA_A01_200_37-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/huB1FD55 ea30eb9e46eedf7f05ed6e348c2baf5d var-GS000010320-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/huDF04CC 4ab0df8f22f595d1747a22c476c05873 var-GS000010427-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/hu7A2F1D 756d0ada29b376140f64e7abfe6aa0e7 var-GS000014566-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/hu553620 7ed4e425bb1c7cc18387cbd9388181df var-GS000015272-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/huD09534 542112e210daff30dd3cfea4801a9f2f var-GS000016374-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/hu599905 33a9f3842b01ea3fdf27cc582f5ea2af var-GS000016015-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/hu43860C a58dca7609fa84c8c38a7e926a97b2fc+302 var-GS00253-DNA_A01_200_37-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/huB1FD55 ea30eb9e46eedf7f05ed6e348c2baf5d+291 var-GS000010320-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/huDF04CC 4ab0df8f22f595d1747a22c476c05873+242 var-GS000010427-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/hu7A2F1D 756d0ada29b376140f64e7abfe6aa0e7+242 var-GS000014566-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/hu553620 7ed4e425bb1c7cc18387cbd9388181df+242 var-GS000015272-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/huD09534 542112e210daff30dd3cfea4801a9f2f+242 var-GS000016374-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/hu599905 33a9f3842b01ea3fdf27cc582f5ea2af+242 var-GS000016015-ASM.tsv.bz2
-https://my.pgp-hms.org/profile/hu599905 d6e2e57cd60ba5979006d0b03e45e726+81 Witch_results.zip
-https://my.pgp-hms.org/profile/hu553620 ea4f2d325592a1272f989d141a917fdd+85 Devenwood_results.zip
-https://my.pgp-hms.org/profile/hu7A2F1D 4580f6620bb15b25b18373766e14e4a7+85 Innkeeper_results.zip
-https://my.pgp-hms.org/profile/huD09534 fee37be9440b912eb90f5e779f272416+82 Hallet_results.zip
-</code></pre>
-</notextile>
-
-h3. Search for a variant
-
-Now we will use crunch to issue a 'grep' job to look for variant rs1126809 in each of the "var-" files (these contain variant calls from WGS data).
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">job = {}
-for c in collections['items']:
-  if [] != filter(lambda f: re.search('^var-.*\.tsv\.bz2', f[1]), c['files']):
-    job[c['uuid']] = arvados.api('v1').jobs().create(body={
-      'script': 'grep',
-      'script_parameters': {'input': c['uuid'], 'pattern': "rs1126809\\b"},
-      'script_version': 'e7aeb42'
-    }).execute()
-    print "%s %s" % (pgpid[c['uuid']], job[c['uuid']]['uuid'])
-</span>
-hu43860C qr1hi-8i9sb-wbf3uthbhkcy8ji
-huB1FD55 qr1hi-8i9sb-scklkiy8dc27dab
-huDF04CC qr1hi-8i9sb-pg0w4rfrwfd9srg
-hu7A2F1D qr1hi-8i9sb-n7u0u0rj8b47168
-hu553620 qr1hi-8i9sb-k7gst7vyhg20pt1
-huD09534 qr1hi-8i9sb-4w65pm48123fte5
-hu599905 qr1hi-8i9sb-wmwa5b5r3eghnev
-hu43860C qr1hi-8i9sb-j1mngmakdh8iv9o
-huB1FD55 qr1hi-8i9sb-4j6ehiatcolaoxb
-huDF04CC qr1hi-8i9sb-n6lcmcr3lowqr5u
-hu7A2F1D qr1hi-8i9sb-0hwsdtojfcxjo40
-hu553620 qr1hi-8i9sb-cvvqzqea7jhwb0i
-huD09534 qr1hi-8i9sb-d0y0qtzuwzbrjj0
-hu599905 qr1hi-8i9sb-i9ec9g8d7rt70xg
-</code></pre>
-</notextile>
-
-
-Monitor job progress by refreshing the Jobs page in Workbench, or by using the API:
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">map(lambda j: arvados.api('v1').jobs().get(uuid=j['uuid']).execute()['success'], job.values())
-[None, True, None, None, None, None, None, None, None, None, None, None, None, None]
-</code></pre>
-</notextile>
-
-Unfinished jobs will appear as None, failed jobs as False, and completed jobs as True.
-
-After the jobs have completed, check output file sizes.
-
-<notextile>
-<pre><code>&gt;&gt;&gt; <span class="userinput">for collection_uuid in job:
-  job_uuid = job[collection_uuid]['uuid']
-  job_output = arvados.api('v1').jobs().get(uuid=job_uuid).execute()['output']
-  output_files = arvados.api('v1').collections().get(uuid=job_output).execute()['files']
-  # Test the output size.  If greater than zero, that means 'grep' found the variant
-  if output_files[0][2] > 0:
-    print("%s has variant rs1126809" % (pgpid[collection_uuid]))
-  else:
-    print("%s does not have variant rs1126809" % (pgpid[collection_uuid]))
-</span>
-hu553620 does not have variant rs1126809
-hu43860C does not have variant rs1126809
-hu599905 has variant rs1126809
-huD09534 has variant rs1126809
-hu553620 does not have variant rs1126809
-huB1FD55 does not have variant rs1126809
-huDF04CC has variant rs1126809
-hu7A2F1D has variant rs1126809
-hu7A2F1D has variant rs1126809
-hu599905 has variant rs1126809
-huDF04CC has variant rs1126809
-huB1FD55 does not have variant rs1126809
-huD09534 has variant rs1126809
-hu43860C does not have variant rs1126809
-</code></pre>
-</notextile>
-
-Thus, of the 14 WGS results available for PGP participants reporting non-melanoma skin cancer, 8 include the rs1126809 variant.
diff --git a/doc/user/tutorials/running-external-program.html.textile.liquid b/doc/user/tutorials/running-external-program.html.textile.liquid
deleted file mode 100644 (file)
index a4e58b8..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Writing a pipeline template"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'pipeline_deprecation_notice' %}
-
-This tutorial demonstrates how to construct a two stage pipeline template that uses the "bwa mem":http://bio-bwa.sourceforge.net/ tool to produce a "Sequence Alignment/Map (SAM)":https://samtools.github.io/ file, then uses the "Picard SortSam tool":http://picard.sourceforge.net/command-line-overview.shtml#SortSam to produce a BAM (Binary Alignment/Map) file.
-
-{% include 'tutorial_expectations' %}
-
-Use the following command to create an empty template using @arv create pipeline_template@:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv create pipeline_template</span></code></pre>
-</notextile>
-
-This will open the template record in an interactive text editor (as specified by $EDITOR or $VISUAL, otherwise defaults to @nano@).  Now, update the contents of the editor with the following content:
-
-<notextile>{% code 'tutorial_bwa_sortsam_pipeline' as javascript %}</notextile>
-
-* @"name"@ is a human-readable name for the pipeline.
-* @"components"@ is a set of scripts or commands that make up the pipeline.  Each component is given an identifier (@"bwa-mem"@ and @"SortSam"@) in this example).
-** Each entry in components @"components"@ is an Arvados job submission.  For more information about individual jobs, see the "job resource reference.":{{site.baseurl}}/api/methods/jobs.html
-* @"repository"@, @"script_version"@, and @"script"@ indicate that we intend to use the external @"run-command"@ tool wrapper that is part of the Arvados.  These parameters are described in more detail in "Writing a script":tutorial-firstscript.html.
-* @"runtime_constraints"@ describes runtime resource requirements for the component.
-** @"docker_image"@ specifies the "Docker":https://www.docker.com/ runtime environment in which to run the job.  The Docker image @"bcosc/arv-base-java"@ supplied here has the Java runtime environment, bwa, and samtools installed.
-** @"arvados_sdk_version"@ specifies a version of the Arvados SDK to load alongside the job's script. The example uses 'master'. If you would like to use a specific version of the sdk, you can find it in the "Arvados Python sdk repository":https://dev.arvados.org/projects/arvados/repository/revisions/master/show/sdk/python under *Latest revisions*.
-* @"script_parameters"@ describes the component parameters.
-** @"command"@ is the actual command line to invoke the @bwa@ and then @SortSam@.  The notation @$()@ denotes macro substitution commands evaluated by the run-command tool wrapper.
-** @"task.stdout"@ indicates that the output of this command should be captured to a file.
-** @$(node.cores)@ evaluates to the number of cores available on the compute node at time the command is run.
-** @$(tmpdir)@ evaluates to the local path for temporary directory the command should use for scratch data.
-** @$(reference_collection)@ evaluates to the script_parameter @"reference_collection"@
-** @$(dir $(...))@ constructs a local path to a directory representing the supplied Arvados collection.
-** @$(file $(...))@ constructs a local path to a given file within the supplied Arvados collection.
-** @$(glob $(...))@ searches the specified path based on a file glob pattern and evalutes to the first result.
-** @$(basename $(...))@ evaluates to the supplied path with leading path portion and trailing filename extensions stripped
-* @"output_of"@ indicates that the @output@ of the @bwa-mem@ component should be used as the @"input"@ script parameter of @SortSam@.  Arvados uses these dependencies between components to automatically determine the correct order to run them.
-
-When using @run-command@, the tool should write its output to the current working directory.  The output will be automatically uploaded to Keep when the job completes.
-
-See the "run-command reference":{{site.baseurl}}/user/topics/run-command.html for more information about using @run-command@.
-
-*Note:* When trying to get job reproducibility without re-computation, you need to set these parameters to their specific hashes. Using a version such as master in @"arvados_sdk_version"@ will grab the latest version hash, which will allow Arvados to re-compute your job if the sdk gets updated.
-* @"arvados_sdk_version"@ : The latest version can be found on the "Arvados Python sdk repository":https://dev.arvados.org/projects/arvados/repository/revisions/master/show/sdk/python under *Latest revisions*.
-* @"script_version"@ : The current version of your script in your git repository can be found by using the following command:
-
-<notextile>
-<pre><code>~$ <span class="userinput">git rev-parse HEAD</span></code></pre>
-</notextile>
-
-* @"docker_image"@ : The docker image hash used is found on the "Collection page":https://playground.arvados.org/collections/qr1hi-4zz18-dov6im679g3jr1n as the *Content address*.
-
-h2. Running your pipeline
-
-Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":{{site.arvados_workbench_host}}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-workflow-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
-
-Test data is available in the "Arvados Tutorial":{{site.arvados_workbench_host}}/projects/qr1hi-j7d0g-u7zg1qdaowykd8d project:
-
-* Choose <i class="fa fa-fw fa-archive"></i> "Tutorial chromosome 19 reference (2463fa9efeb75e099685528b3b9071e0+438)":{{site.arvados_workbench_host}}/collections/2463fa9efeb75e099685528b3b9071e0+438 for the "reference_collection" parameter
-* Choose <i class="fa fa-fw fa-archive"></i> "Tutorial sample exome (3229739b505d2b878b62aed09895a55a+142)":{{site.arvados_workbench_host}}/collections/3229739b505d2b878b62aed09895a55a+142 for the "sample" parameter
-
-For more information and examples for writing pipelines, see the "pipeline template reference":{{site.baseurl}}/api/methods/pipeline_templates.html
-
-h2. Re-using your pipeline run
-
-Arvados allows users to re-use jobs that have the same inputs in order to save computing time and resources. Users are able to change a job downstream without re-computing earlier jobs. This section shows which version control parameters should be tuned to make sure Arvados will not re-compute your jobs.
-
-Note: Job reuse can only happen if all input collections do not change.
-
-* @"arvados_sdk_version"@ : The arvados_sdk_version parameter is used to download the specific version of the Arvados sdk into the docker image. The latest version can be found in the "Arvados Python sdk repository":https://dev.arvados.org/projects/arvados/repository/revisions/master/show/sdk/python under *Latest revisions*. Make sure you set this to the same version as the previous run that you are trying to reuse.
-* @"script_version"@ : The script_version is the commit hash of the git branch that the crunch script resides in. This information can be found in your git repository by using the following command:
-
-<notextile>
-<pre><code>~$ <span class="userinput">git rev-parse HEAD</span></code></pre>
-</notextile>
-
-* @"docker_image"@ : This specifies the "Docker":https://www.docker.com/ runtime environment where jobs run their scripts. Docker version control is similar to git, and you can commit and push changes to your images. You must re-use the docker image hash from the previous run to use the same image. It can be found on the "Collection page":https://playground.arvados.org/collections/qr1hi-4zz18-dov6im679g3jr1n as the *Content address* or the *docker_image_locator* in a job's metadata.
diff --git a/doc/user/tutorials/tutorial-firstscript.html.textile.liquid b/doc/user/tutorials/tutorial-firstscript.html.textile.liquid
deleted file mode 100644 (file)
index 3937698..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
----
-layout: default
-navsection: userguide
-navmenu: Tutorials
-title: "Writing a Crunch script"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'pipeline_deprecation_notice' %}
-
-This tutorial demonstrates how to write a script using Arvados Python SDK.  The Arvados SDK supports access to advanced features not available using the @run-command@ wrapper, such as scheduling concurrent tasks across nodes.
-
-{% include 'tutorial_expectations' %}
-
-This tutorial uses @$USER@ to denote your username.  Replace @$USER@ with your user name in all the following examples.
-
-Start by creating a directory called @tutorial@ in your home directory.  Next, create a subdirectory called @crunch_scripts@ and change to that directory:
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd $HOME</span>
-~$ <span class="userinput">mkdir -p tutorial/crunch_scripts</span>
-~$ <span class="userinput">cd tutorial/crunch_scripts</span></code></pre>
-</notextile>
-
-Next, using @nano@ or your favorite Unix text editor, create a new file called @hash.py@ in the @crunch_scripts@ directory.
-
-notextile. <pre>~/tutorial/crunch_scripts$ <code class="userinput">nano hash.py</code></pre>
-
-Add the following code to compute the MD5 hash of each file in a collection:
-
-<notextile> {% code 'tutorial_hash_script_py' as python %} </notextile>
-
-Make the file executable:
-
-notextile. <pre><code>~/tutorial/crunch_scripts$ <span class="userinput">chmod +x hash.py</span></code></pre>
-
-Next, create a submission job record.  This describes a specific invocation of your script:
-
-<notextile>
-<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">cat &gt;~/the_job &lt;&lt;EOF
-{
- "repository":"",
- "script":"hash.py",
- "script_version":"$HOME/tutorial",
- "script_parameters":{
-   "input":"c1bad4b39ca5a924e481008009d94e32+210"
- }
-}
-EOF</span>
-</code></pre>
-</notextile>
-
-You can now run your script on your local workstation or VM using @arv-crunch-job@:
-
-<notextile>
-<pre><code>~/tutorial/crunch_scripts</span>$ <span class="userinput">arv-crunch-job --job "$(cat ~/the_job)"</span>
-2014-08-06_15:16:22 qr1hi-8i9sb-qyrat80ef927lam 14473  check slurm allocation
-2014-08-06_15:16:22 qr1hi-8i9sb-qyrat80ef927lam 14473  node localhost - 1 slots
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473  start
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473  script hash.py
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473  script_version $HOME/tutorial
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473  script_parameters {"input":"c1bad4b39ca5a924e481008009d94e32+210"}
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473  runtime_constraints {"max_tasks_per_node":0}
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473  start level 0
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473  status: 0 done, 0 running, 1 todo
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473 0 job_task qr1hi-ot0gb-lptn85mwkrn9pqo
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473 0 child 14478 started on localhost.1
-2014-08-06_15:16:23 qr1hi-8i9sb-qyrat80ef927lam 14473  status: 0 done, 1 running, 0 todo
-2014-08-06_15:16:24 qr1hi-8i9sb-qyrat80ef927lam 14473 0 stderr crunchstat: Running [stdbuf --output=0 --error=0 /home/$USER/tutorial/crunch_scripts/hash.py]
-2014-08-06_15:16:24 qr1hi-8i9sb-qyrat80ef927lam 14473 0 child 14478 on localhost.1 exit 0 signal 0 success=true
-2014-08-06_15:16:24 qr1hi-8i9sb-qyrat80ef927lam 14473 0 success in 1 seconds
-2014-08-06_15:16:24 qr1hi-8i9sb-qyrat80ef927lam 14473 0 output
-2014-08-06_15:16:25 qr1hi-8i9sb-qyrat80ef927lam 14473  wait for last 0 children to finish
-2014-08-06_15:16:25 qr1hi-8i9sb-qyrat80ef927lam 14473  status: 1 done, 0 running, 1 todo
-2014-08-06_15:16:25 qr1hi-8i9sb-qyrat80ef927lam 14473  start level 1
-2014-08-06_15:16:25 qr1hi-8i9sb-qyrat80ef927lam 14473  status: 1 done, 0 running, 1 todo
-2014-08-06_15:16:25 qr1hi-8i9sb-qyrat80ef927lam 14473 1 job_task qr1hi-ot0gb-e3obm0lv6k6p56a
-2014-08-06_15:16:25 qr1hi-8i9sb-qyrat80ef927lam 14473 1 child 14504 started on localhost.1
-2014-08-06_15:16:25 qr1hi-8i9sb-qyrat80ef927lam 14473  status: 1 done, 1 running, 0 todo
-2014-08-06_15:16:26 qr1hi-8i9sb-qyrat80ef927lam 14473 1 stderr crunchstat: Running [stdbuf --output=0 --error=0 /home/$USER/tutorial/crunch_scripts/hash.py]
-2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473 1 child 14504 on localhost.1 exit 0 signal 0 success=true
-2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473 1 success in 10 seconds
-2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473 1 output 8c20281b9840f624a486e4f1a78a1da8+105+A234be74ceb5ea31db6e11b6be26f3eb76d288ad0@54987018
-2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473  wait for last 0 children to finish
-2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473  status: 2 done, 0 running, 0 todo
-2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473  release job allocation
-2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473  Freeze not implemented
-2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473  collate
-2014-08-06_15:16:36 qr1hi-8i9sb-qyrat80ef927lam 14473  collated output manifest text to send to API server is 105 bytes with access tokens
-2014-08-06_15:16:36 qr1hi-8i9sb-qyrat80ef927lam 14473  output hash c1b44b6dc41ef334cf1136033ca950e6+54
-2014-08-06_15:16:37 qr1hi-8i9sb-qyrat80ef927lam 14473  finish
-2014-08-06_15:16:38 qr1hi-8i9sb-qyrat80ef927lam 14473  log manifest is 7fe8cf1d45d438a3ca3ac4a184b7aff4+83
-</code></pre>
-</notextile>
-
-Although the job runs locally, the output of the job has been saved to Keep, the Arvados file store.  The "output hash" line (third from the bottom) provides the portable data hash of the Arvados collection where the script's output has been saved.  Copy the output hash and use @arv-ls@ to list the contents of your output collection, and @arv-get@ to download it to the current directory:
-
-<notextile>
-<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">arv-ls c1b44b6dc41ef334cf1136033ca950e6+54</span>
-./md5sum.txt
-~/tutorial/crunch_scripts$ <span class="userinput">arv-get c1b44b6dc41ef334cf1136033ca950e6+54/ .</span>
-0 MiB / 0 MiB 100.0%
-~/tutorial/crunch_scripts$ <span class="userinput">cat md5sum.txt</span>
-44b8ae3fde7a8a88d2f7ebd237625b4f c1bad4b39ca5a924e481008009d94e32+210/var-GS000016015-ASM.tsv.bz2
-</code></pre>
-</notextile>
-
-Running locally is convenient for development and debugging, as it permits a fast iterative development cycle.  Your job run is also recorded by Arvados, and will appear in the *Recent jobs and pipelines* panel on the "Workbench Dashboard":{{site.arvados_workbench_host}}.  This provides limited provenance, by recording the input parameters, the execution log, and the output.  However, running locally does not allow you to scale out to multiple nodes, and does not store the complete system snapshot required to achieve reproducibility; to do that you need to "submit a job to the Arvados cluster":{{site.baseurl}}/user/tutorials/tutorial-submit-job.html.
diff --git a/doc/user/tutorials/tutorial-submit-job.html.textile.liquid b/doc/user/tutorials/tutorial-submit-job.html.textile.liquid
deleted file mode 100644 (file)
index ff78aab..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
----
-layout: default
-navsection: userguide
-navmenu: Tutorials
-title: "Running on an Arvados cluster"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'pipeline_deprecation_notice' %}
-
-This tutorial demonstrates how to create a pipeline to run your crunch script on an Arvados cluster.  Cluster jobs can scale out to multiple nodes, and use @git@ and @docker@ to store the complete system snapshot required to achieve reproducibilty.
-
-{% include 'tutorial_expectations' %}
-
-This tutorial uses @$USER@ to denote your username.  Replace @$USER@ with your user name in all the following examples.
-
-Also, this tutorial uses the @tutorial@ arvados repository created in "Adding a new arvados repository":add-new-repository.html as the example repository.
-
-h2. Clone Arvados repository
-
-Please clone the *tutorial* repository using the instructions from "Working with Arvados git repository":git-arvados-guide.html, if you have not yet cloned already.
-
-h2. Creating a Crunch script
-
-Start by entering the @tutorial@ directory created by @git clone@. Next, create a subdirectory called @crunch_scripts@ and change to that directory:
-
-<notextile>
-<pre><code>>~$ <span class="userinput">cd tutorial</span>
-~/tutorial$ <span class="userinput">mkdir crunch_scripts</span>
-~/tutorial$ <span class="userinput">cd crunch_scripts</span></code></pre>
-</notextile>
-
-Next, using @nano@ or your favorite Unix text editor, create a new file called @hash.py@ in the @crunch_scripts@ directory.
-
-notextile. <pre>~/tutorial/crunch_scripts$ <code class="userinput">nano hash.py</code></pre>
-
-Add the following code to compute the MD5 hash of each file in a collection (if you already completed "Writing a Crunch script":tutorial-firstscript.html you can just copy the @hash.py@ file you created previously.)
-
-<notextile> {% code 'tutorial_hash_script_py' as python %} </notextile>
-
-Make the file executable:
-
-notextile. <pre><code>~/tutorial/crunch_scripts$ <span class="userinput">chmod +x hash.py</span></code></pre>
-
-Next, add the file to the staging area.  This tells @git@ that the file should be included on the next commit.
-
-notextile. <pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git add hash.py</span></code></pre>
-
-Next, commit your changes.  All staged changes are recorded into the local git repository:
-
-<notextile>
-<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git commit -m "my first script"</span>
-[master (root-commit) 27fd88b] my first script
- 1 file changed, 45 insertions(+)
- create mode 100755 crunch_scripts/hash.py</code></pre>
-</notextile>
-
-Finally, upload your changes to the Arvados server:
-
-<notextile>
-<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git push origin master</span>
-Counting objects: 4, done.
-Compressing objects: 100% (2/2), done.
-Writing objects: 100% (4/4), 682 bytes, done.
-Total 4 (delta 0), reused 0 (delta 0)
-To git@git.qr1hi.arvadosapi.com:$USER/tutorial.git
- * [new branch]      master -> master</code></pre>
-</notextile>
-
-h2. Create a pipeline template
-
-Next, create a new template using @arv create pipeline_template@:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv create pipeline_template</span></code></pre>
-</notextile>
-
-In the editor, enter the following template:
-
-<notextile> {% code 'tutorial_submit_job' as javascript %} </notextile>
-
-* @"repository"@ is the name of a git repository to search for the script version.  You can access a list of available git repositories on the Arvados Workbench in the *Repositories* page using the <span class="fa fa-lg fa-user"></span> <span class="caret"></span> top navigation menu icon.
-* @"script_version"@ specifies the version of the script that you wish to run.  This can be in the form of an explicit Git revision hash, a tag, or a branch (in which case it will use the HEAD of the specified branch).  Arvados logs the script version that was used in the run, enabling you to go back and re-run any past job with the guarantee that the exact same code will be used as was used in the previous run.
-* @"script"@ specifies the filename of the script to run.  Crunch expects to find this in the @crunch_scripts/@ subdirectory of the Git repository.
-* @"runtime_constraints"@ describes the runtime environment required to run the job.  These are described in the "job record schema":{{site.baseurl}}/api/methods/jobs.html
-
-h2. Running your pipeline
-
-Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":{{site.arvados_workbench_host}}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-workflow-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
-
-For more information and examples for writing pipelines, see the "pipeline template reference":{{site.baseurl}}/api/methods/pipeline_templates.html
index 35933f99fc2afafdb970cc2127587f182364f094..a9841ca9f53f4fc247b2ae73bf416eddf52b8249 100644 (file)
@@ -22,7 +22,6 @@ var (
        Copy = externalCmd{"arv-copy"}
        Tag  = externalCmd{"arv-tag"}
        Ws   = externalCmd{"arv-ws"}
-       Run  = externalCmd{"arv-run"}
 
        Keep = cmd.Multi(map[string]cmd.Handler{
                "get":       externalCmd{"arv-get"},
@@ -31,9 +30,6 @@ var (
                "normalize": externalCmd{"arv-normalize"},
                "docker":    externalCmd{"arv-keepdocker"},
        })
-       Pipeline = cmd.Multi(map[string]cmd.Handler{
-               "run": externalCmd{"arv-run-pipeline-instance"},
-       })
        // user, group, container, specimen, etc.
        APICall = apiCallCmd{}
 )
@@ -94,7 +90,7 @@ func (ec externalCmd) RunCommand(prog string, args []string, stdin io.Reader, st
                return 1
        case *exec.Error:
                fmt.Fprintln(stderr, err)
-               if ec.prog == "arv" || ec.prog == "arv-run-pipeline-instance" {
+               if ec.prog == "arv" {
                        fmt.Fprint(stderr, rubyInstallHints)
                } else if strings.HasPrefix(ec.prog, "arv-") {
                        fmt.Fprint(stderr, pythonInstallHints)
index 6da5344fa3a55920894def50df20acc68d4bcd02..163cd87ec5107ee826e1b971eac94fc4f8b1892d 100644 (file)
@@ -215,8 +215,8 @@ Clusters:
       # to run an open instance where anyone can create an account and use
       # the system without requiring manual approval.
       #
-      # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
-      # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
+      # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.
+      # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.
       AutoSetupNewUsers: false
       AutoSetupNewUsersWithVmUUID: ""
       AutoSetupNewUsersWithRepository: false
@@ -229,7 +229,7 @@ Clusters:
         syslog: {}
         SAMPLE: {}
 
-      # When new_users_are_active is set to true, new users will be active
+      # When NewUsersAreActive is set to true, new users will be active
       # immediately.  This skips the "self-activate" step which enforces
       # user agreements.  Should only be enabled for development.
       NewUsersAreActive: false
@@ -241,7 +241,7 @@ Clusters:
       # should be an address associated with a Google account.
       AutoAdminUserWithEmail: ""
 
-      # If auto_admin_first_user is set to true, the first user to log in when no
+      # If AutoAdminFirstUser is set to true, the first user to log in when no
       # other admin users exist will automatically become an admin user.
       AutoAdminFirstUser: false
 
@@ -254,7 +254,7 @@ Clusters:
       NewUserNotificationRecipients: {}
       NewInactiveUserNotificationRecipients: {}
 
-      # Set anonymous_user_token to enable anonymous user access. You can get
+      # Set AnonymousUserToken to enable anonymous user access. You can get
       # the token by running "bundle exec ./script/get_anonymous_user_token.rb"
       # in the directory where your API server is running.
       AnonymousUserToken: ""
@@ -270,7 +270,7 @@ Clusters:
 
       # Maximum number of log rows to delete in a single SQL transaction.
       #
-      # If max_audit_log_delete_batch is 0, log entries will never be
+      # If MaxDeleteBatch is 0, log entries will never be
       # deleted by Arvados. Cleanup can be done by an external process
       # without affecting any Arvados system processes, as long as very
       # recent (<5 minutes old) logs are not deleted.
@@ -319,7 +319,7 @@ Clusters:
       # identical to the permission key given to Keep. IMPORTANT: This is
       # a site secret. It should be at least 50 characters.
       #
-      # Modifying blob_signing_key will invalidate all existing
+      # Modifying BlobSigningKey will invalidate all existing
       # signatures, which can cause programs to fail (e.g., arv-put,
       # arv-get, and Crunch jobs).  To avoid errors, rotate keys only when
       # no such processes are running.
@@ -341,14 +341,14 @@ Clusters:
       # keepstore servers.  Otherwise, reading data blocks and saving
       # collections will fail with HTTP 403 permission errors.
       #
-      # Modifying blob_signature_ttl invalidates existing signatures; see
-      # blob_signing_key note above.
+      # Modifying BlobSigningTTL invalidates existing signatures; see
+      # BlobSigningKey note above.
       #
       # The default is 2 weeks.
       BlobSigningTTL: 336h
 
       # Default lifetime for ephemeral collections: 2 weeks. This must not
-      # be less than blob_signature_ttl.
+      # be less than BlobSigningTTL.
       DefaultTrashLifetime: 336h
 
       # Interval (seconds) between trash sweeps. During a trash sweep,
@@ -358,7 +358,7 @@ Clusters:
 
       # If true, enable collection versioning.
       # When a collection's preserve_version field is true or the current version
-      # is older than the amount of seconds defined on preserve_version_if_idle,
+      # is older than the amount of seconds defined on PreserveVersionIfIdle,
       # a snapshot of the collection's previous state is created and linked to
       # the current collection.
       CollectionVersioning: false
@@ -395,6 +395,23 @@ Clusters:
       # The default setting (false) is appropriate for a multi-user site.
       TrustAllContent: false
 
+      # Cache parameters for WebDAV content serving:
+      # * TTL: Maximum time to cache manifests and permission checks.
+      # * UUIDTTL: Maximum time to cache collection state.
+      # * MaxBlockEntries: Maximum number of block cache entries.
+      # * MaxCollectionEntries: Maximum number of collection cache entries.
+      # * MaxCollectionBytes: Approximate memory limit for collection cache.
+      # * MaxPermissionEntries: Maximum number of permission cache entries.
+      # * MaxUUIDEntries: Maximum number of UUID cache entries.
+      WebDAVCache:
+        TTL: 300s
+        UUIDTTL: 5s
+        MaxBlockEntries:      4
+        MaxCollectionEntries: 1000
+        MaxCollectionBytes:   100000000
+        MaxPermissionEntries: 1000
+        MaxUUIDEntries:       1000
+
     Login:
       # These settings are provided by your OAuth2 provider (e.g.,
       # sso-provider).
@@ -573,7 +590,11 @@ Clusters:
           AssignNodeHostname: "compute%<slot_number>d"
 
       JobsAPI:
-        # Enable the legacy Jobs API.  This value must be a string.
+        # Enable the legacy 'jobs' API (crunch v1).  This value must be a string.
+        #
+        # Note: this only enables read-only access, creating new
+        # legacy jobs and pipelines is not supported.
+        #
         # 'auto' -- (default) enable the Jobs API only if it has been used before
         #         (i.e., there are job records in the database)
         # 'true' -- enable the Jobs API despite lack of existing records.
@@ -586,30 +607,6 @@ Clusters:
         # {git_repositories_dir}/arvados/.git
         GitInternalDir: /var/lib/arvados/internal.git
 
-        # Docker image to be used when none found in runtime_constraints of a job
-        DefaultDockerImage: ""
-
-        # none or slurm_immediate
-        CrunchJobWrapper: none
-
-        # username, or false = do not set uid when running jobs.
-        CrunchJobUser: crunch
-
-        # The web service must be able to create/write this file, and
-        # crunch-job must be able to stat() it.
-        CrunchRefreshTrigger: /tmp/crunch_refresh_trigger
-
-        # Control job reuse behavior when two completed jobs match the
-        # search criteria and have different outputs.
-        #
-        # If true, in case of a conflict, reuse the earliest job (this is
-        # similar to container reuse behavior).
-        #
-        # If false, in case of a conflict, do not reuse any completed job,
-        # but do reuse an already-running job if available (this is the
-        # original job reuse behavior, and is still the default).
-        ReuseJobIfOutputsDiffer: false
-
       CloudVMs:
         # Enable the cloud scheduler (experimental).
         Enable: false
index 4e7b85ec5d9c43644ddfc0e451ec9703df70dce3..df872111db2adf8ff1bc504fd0a5b2f31f2b14a6 100644 (file)
@@ -398,3 +398,73 @@ func (ldr *Loader) loadOldKeepproxyConfig(cfg *arvados.Config) error {
        cfg.Clusters[cluster.ClusterID] = *cluster
        return nil
 }
+
+const defaultKeepWebConfigPath = "/etc/arvados/keep-web/keep-web.yml"
+
+type oldKeepWebConfig struct {
+       Client *arvados.Client
+
+       Listen string
+
+       AnonymousTokens    []string
+       AttachmentOnlyHost string
+       TrustAllContent    bool
+
+       Cache struct {
+               TTL                  arvados.Duration
+               UUIDTTL              arvados.Duration
+               MaxCollectionEntries int
+               MaxCollectionBytes   int64
+               MaxPermissionEntries int
+               MaxUUIDEntries       int
+       }
+
+       // Hack to support old command line flag, which is a bool
+       // meaning "get actual token from environment".
+       deprecatedAllowAnonymous bool
+
+       // Authorization token to be included in all health check requests.
+       ManagementToken string
+}
+
+func (ldr *Loader) loadOldKeepWebConfig(cfg *arvados.Config) error {
+       if ldr.KeepWebPath == "" {
+               return nil
+       }
+       var oc oldKeepWebConfig
+       err := ldr.loadOldConfigHelper("keep-web", ldr.KeepWebPath, &oc)
+       if os.IsNotExist(err) && ldr.KeepWebPath == defaultKeepWebConfigPath {
+               return nil
+       } else if err != nil {
+               return err
+       }
+
+       cluster, err := cfg.GetCluster("")
+       if err != nil {
+               return err
+       }
+
+       loadOldClientConfig(cluster, oc.Client)
+
+       cluster.Services.WebDAV.InternalURLs[arvados.URL{Host: oc.Listen}] = arvados.ServiceInstance{}
+       cluster.Services.WebDAVDownload.InternalURLs[arvados.URL{Host: oc.Listen}] = arvados.ServiceInstance{}
+       cluster.Services.WebDAVDownload.ExternalURL = arvados.URL{Host: oc.AttachmentOnlyHost}
+       cluster.TLS.Insecure = oc.Client.Insecure
+       cluster.ManagementToken = oc.ManagementToken
+       cluster.Collections.TrustAllContent = oc.TrustAllContent
+       cluster.Collections.WebDAVCache.TTL = oc.Cache.TTL
+       cluster.Collections.WebDAVCache.UUIDTTL = oc.Cache.UUIDTTL
+       cluster.Collections.WebDAVCache.MaxCollectionEntries = oc.Cache.MaxCollectionEntries
+       cluster.Collections.WebDAVCache.MaxCollectionBytes = oc.Cache.MaxCollectionBytes
+       cluster.Collections.WebDAVCache.MaxPermissionEntries = oc.Cache.MaxPermissionEntries
+       cluster.Collections.WebDAVCache.MaxUUIDEntries = oc.Cache.MaxUUIDEntries
+       if len(oc.AnonymousTokens) > 0 {
+               cluster.Users.AnonymousUserToken = oc.AnonymousTokens[0]
+               if len(oc.AnonymousTokens) > 1 {
+                       ldr.Logger.Warn("More than 1 anonymous tokens configured, using only the first and discarding the rest.")
+               }
+       }
+
+       cfg.Clusters[cluster.ClusterID] = *cluster
+       return nil
+}
index 308b0cc359e92b1d79e2d60502d2069eed3d5102..8479842be9a3bf1255ff3777373b11468f7bbf8d 100644 (file)
@@ -5,8 +5,12 @@
 package config
 
 import (
+       "flag"
+       "io/ioutil"
        "os"
+       "time"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        check "gopkg.in/check.v1"
 )
 
@@ -51,3 +55,75 @@ Clusters:
      listen: ":9006"
 `)
 }
+
+func (s *LoadSuite) TestLegacyKeepWebConfig(c *check.C) {
+       content := []byte(`
+{
+       "Client": {
+               "Scheme": "",
+               "APIHost": "example.com",
+               "AuthToken": "abcdefg",
+       },
+       "Listen": ":80",
+       "AnonymousTokens": [
+               "anonusertoken"
+       ],
+       "AttachmentOnlyHost": "download.example.com",
+       "TrustAllContent": true,
+       "Cache": {
+               "TTL": "1m",
+               "UUIDTTL": "1s",
+               "MaxCollectionEntries": 42,
+               "MaxCollectionBytes": 1234567890,
+               "MaxPermissionEntries": 100,
+               "MaxUUIDEntries": 100
+       },
+       "ManagementToken": "xyzzy"
+}
+`)
+       tmpfile, err := ioutil.TempFile("", "example")
+       if err != nil {
+               c.Error(err)
+       }
+       defer os.Remove(tmpfile.Name())
+
+       if _, err := tmpfile.Write(content); err != nil {
+               c.Error(err)
+       }
+       if err := tmpfile.Close(); err != nil {
+               c.Error(err)
+       }
+       flags := flag.NewFlagSet("keep-web", flag.ExitOnError)
+       ldr := testLoader(c, "Clusters: {zzzzz: {}}", nil)
+       ldr.SetupFlags(flags)
+       args := ldr.MungeLegacyConfigArgs(ldr.Logger, []string{"-config", tmpfile.Name()}, "-legacy-keepweb-config")
+       flags.Parse(args)
+       cfg, err := ldr.Load()
+       if err != nil {
+               c.Error(err)
+       }
+       c.Check(cfg, check.NotNil)
+       cluster, err := cfg.GetCluster("")
+       if err != nil {
+               c.Error(err)
+       }
+       c.Check(cluster, check.NotNil)
+
+       c.Check(cluster.Services.Controller.ExternalURL, check.Equals, arvados.URL{Scheme: "https", Host: "example.com"})
+       c.Check(cluster.SystemRootToken, check.Equals, "abcdefg")
+
+       c.Check(cluster.Collections.WebDAVCache.TTL, check.Equals, arvados.Duration(60*time.Second))
+       c.Check(cluster.Collections.WebDAVCache.UUIDTTL, check.Equals, arvados.Duration(time.Second))
+       c.Check(cluster.Collections.WebDAVCache.MaxCollectionEntries, check.Equals, 42)
+       c.Check(cluster.Collections.WebDAVCache.MaxCollectionBytes, check.Equals, int64(1234567890))
+       c.Check(cluster.Collections.WebDAVCache.MaxPermissionEntries, check.Equals, 100)
+       c.Check(cluster.Collections.WebDAVCache.MaxUUIDEntries, check.Equals, 100)
+
+       c.Check(cluster.Services.WebDAVDownload.ExternalURL, check.Equals, arvados.URL{Host: "download.example.com"})
+       c.Check(cluster.Services.WebDAVDownload.InternalURLs[arvados.URL{Host: ":80"}], check.NotNil)
+       c.Check(cluster.Services.WebDAV.InternalURLs[arvados.URL{Host: ":80"}], check.NotNil)
+
+       c.Check(cluster.Collections.TrustAllContent, check.Equals, true)
+       c.Check(cluster.Users.AnonymousUserToken, check.Equals, "anonusertoken")
+       c.Check(cluster.ManagementToken, check.Equals, "xyzzy")
+}
index 82b48b36b0fc074dc82b551985e58a7d08f883bd..6eb4fbe5f570d4abf9ffd885f1ab0d822acf7fa2 100644 (file)
@@ -90,6 +90,7 @@ var whitelist = map[string]bool{
        "Collections.PreserveVersionIfIdle":            true,
        "Collections.TrashSweepInterval":               false,
        "Collections.TrustAllContent":                  false,
+       "Collections.WebDAVCache":                      false,
        "Containers":                                   true,
        "Containers.CloudVMs":                          false,
        "Containers.CrunchRunCommand":                  false,
@@ -97,13 +98,8 @@ var whitelist = map[string]bool{
        "Containers.DefaultKeepCacheRAM":               true,
        "Containers.DispatchPrivateKey":                false,
        "Containers.JobsAPI":                           true,
-       "Containers.JobsAPI.CrunchJobUser":             false,
-       "Containers.JobsAPI.CrunchJobWrapper":          false,
-       "Containers.JobsAPI.CrunchRefreshTrigger":      false,
-       "Containers.JobsAPI.DefaultDockerImage":        false,
        "Containers.JobsAPI.Enable":                    true,
        "Containers.JobsAPI.GitInternalDir":            false,
-       "Containers.JobsAPI.ReuseJobIfOutputsDiffer":   false,
        "Containers.Logging":                           false,
        "Containers.LogReuseDecisions":                 false,
        "Containers.MaxComputeVMs":                     false,
index ffcdddfe2fce5962babde94f1a47853174b14a2c..1eae24d84e540ccc597cc0dcf8f048fadf19514e 100644 (file)
@@ -221,8 +221,8 @@ Clusters:
       # to run an open instance where anyone can create an account and use
       # the system without requiring manual approval.
       #
-      # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
-      # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
+      # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.
+      # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.
       AutoSetupNewUsers: false
       AutoSetupNewUsersWithVmUUID: ""
       AutoSetupNewUsersWithRepository: false
@@ -235,7 +235,7 @@ Clusters:
         syslog: {}
         SAMPLE: {}
 
-      # When new_users_are_active is set to true, new users will be active
+      # When NewUsersAreActive is set to true, new users will be active
       # immediately.  This skips the "self-activate" step which enforces
       # user agreements.  Should only be enabled for development.
       NewUsersAreActive: false
@@ -247,7 +247,7 @@ Clusters:
       # should be an address associated with a Google account.
       AutoAdminUserWithEmail: ""
 
-      # If auto_admin_first_user is set to true, the first user to log in when no
+      # If AutoAdminFirstUser is set to true, the first user to log in when no
       # other admin users exist will automatically become an admin user.
       AutoAdminFirstUser: false
 
@@ -260,7 +260,7 @@ Clusters:
       NewUserNotificationRecipients: {}
       NewInactiveUserNotificationRecipients: {}
 
-      # Set anonymous_user_token to enable anonymous user access. You can get
+      # Set AnonymousUserToken to enable anonymous user access. You can get
       # the token by running "bundle exec ./script/get_anonymous_user_token.rb"
       # in the directory where your API server is running.
       AnonymousUserToken: ""
@@ -276,7 +276,7 @@ Clusters:
 
       # Maximum number of log rows to delete in a single SQL transaction.
       #
-      # If max_audit_log_delete_batch is 0, log entries will never be
+      # If MaxDeleteBatch is 0, log entries will never be
       # deleted by Arvados. Cleanup can be done by an external process
       # without affecting any Arvados system processes, as long as very
       # recent (<5 minutes old) logs are not deleted.
@@ -325,7 +325,7 @@ Clusters:
       # identical to the permission key given to Keep. IMPORTANT: This is
       # a site secret. It should be at least 50 characters.
       #
-      # Modifying blob_signing_key will invalidate all existing
+      # Modifying BlobSigningKey will invalidate all existing
       # signatures, which can cause programs to fail (e.g., arv-put,
       # arv-get, and Crunch jobs).  To avoid errors, rotate keys only when
       # no such processes are running.
@@ -347,14 +347,14 @@ Clusters:
       # keepstore servers.  Otherwise, reading data blocks and saving
       # collections will fail with HTTP 403 permission errors.
       #
-      # Modifying blob_signature_ttl invalidates existing signatures; see
-      # blob_signing_key note above.
+      # Modifying BlobSigningTTL invalidates existing signatures; see
+      # BlobSigningKey note above.
       #
       # The default is 2 weeks.
       BlobSigningTTL: 336h
 
       # Default lifetime for ephemeral collections: 2 weeks. This must not
-      # be less than blob_signature_ttl.
+      # be less than BlobSigningTTL.
       DefaultTrashLifetime: 336h
 
       # Interval (seconds) between trash sweeps. During a trash sweep,
@@ -364,7 +364,7 @@ Clusters:
 
       # If true, enable collection versioning.
       # When a collection's preserve_version field is true or the current version
-      # is older than the amount of seconds defined on preserve_version_if_idle,
+      # is older than the amount of seconds defined on PreserveVersionIfIdle,
       # a snapshot of the collection's previous state is created and linked to
       # the current collection.
       CollectionVersioning: false
@@ -401,6 +401,23 @@ Clusters:
       # The default setting (false) is appropriate for a multi-user site.
       TrustAllContent: false
 
+      # Cache parameters for WebDAV content serving:
+      # * TTL: Maximum time to cache manifests and permission checks.
+      # * UUIDTTL: Maximum time to cache collection state.
+      # * MaxBlockEntries: Maximum number of block cache entries.
+      # * MaxCollectionEntries: Maximum number of collection cache entries.
+      # * MaxCollectionBytes: Approximate memory limit for collection cache.
+      # * MaxPermissionEntries: Maximum number of permission cache entries.
+      # * MaxUUIDEntries: Maximum number of UUID cache entries.
+      WebDAVCache:
+        TTL: 300s
+        UUIDTTL: 5s
+        MaxBlockEntries:      4
+        MaxCollectionEntries: 1000
+        MaxCollectionBytes:   100000000
+        MaxPermissionEntries: 1000
+        MaxUUIDEntries:       1000
+
     Login:
       # These settings are provided by your OAuth2 provider (e.g.,
       # sso-provider).
@@ -579,7 +596,11 @@ Clusters:
           AssignNodeHostname: "compute%<slot_number>d"
 
       JobsAPI:
-        # Enable the legacy Jobs API.  This value must be a string.
+        # Enable the legacy 'jobs' API (crunch v1).  This value must be a string.
+        #
+        # Note: this only enables read-only access, creating new
+        # legacy jobs and pipelines is not supported.
+        #
         # 'auto' -- (default) enable the Jobs API only if it has been used before
         #         (i.e., there are job records in the database)
         # 'true' -- enable the Jobs API despite lack of existing records.
@@ -592,30 +613,6 @@ Clusters:
         # {git_repositories_dir}/arvados/.git
         GitInternalDir: /var/lib/arvados/internal.git
 
-        # Docker image to be used when none found in runtime_constraints of a job
-        DefaultDockerImage: ""
-
-        # none or slurm_immediate
-        CrunchJobWrapper: none
-
-        # username, or false = do not set uid when running jobs.
-        CrunchJobUser: crunch
-
-        # The web service must be able to create/write this file, and
-        # crunch-job must be able to stat() it.
-        CrunchRefreshTrigger: /tmp/crunch_refresh_trigger
-
-        # Control job reuse behavior when two completed jobs match the
-        # search criteria and have different outputs.
-        #
-        # If true, in case of a conflict, reuse the earliest job (this is
-        # similar to container reuse behavior).
-        #
-        # If false, in case of a conflict, do not reuse any completed job,
-        # but do reuse an already-running job if available (this is the
-        # original job reuse behavior, and is still the default).
-        ReuseJobIfOutputsDiffer: false
-
       CloudVMs:
         # Enable the cloud scheduler (experimental).
         Enable: false
index 309c0a615dc98620ac86895c471f60db5c50807c..c0b44c17eb3f421256ea8ca61b05ed965e45288b 100644 (file)
@@ -31,6 +31,7 @@ type Loader struct {
 
        Path                    string
        KeepstorePath           string
+       KeepWebPath             string
        CrunchDispatchSlurmPath string
        WebsocketPath           string
        KeepproxyPath           string
@@ -61,6 +62,7 @@ func NewLoader(stdin io.Reader, logger logrus.FieldLogger) *Loader {
 func (ldr *Loader) SetupFlags(flagset *flag.FlagSet) {
        flagset.StringVar(&ldr.Path, "config", arvados.DefaultConfigFile, "Site configuration `file` (default may be overridden by setting an ARVADOS_CONFIG environment variable)")
        flagset.StringVar(&ldr.KeepstorePath, "legacy-keepstore-config", defaultKeepstoreConfigPath, "Legacy keepstore configuration `file`")
+       flagset.StringVar(&ldr.KeepWebPath, "legacy-keepweb-config", defaultKeepWebConfigPath, "Legacy keep-web configuration `file`")
        flagset.StringVar(&ldr.CrunchDispatchSlurmPath, "legacy-crunch-dispatch-slurm-config", defaultCrunchDispatchSlurmConfigPath, "Legacy crunch-dispatch-slurm configuration `file`")
        flagset.StringVar(&ldr.WebsocketPath, "legacy-ws-config", defaultWebsocketConfigPath, "Legacy arvados-ws configuration `file`")
        flagset.StringVar(&ldr.KeepproxyPath, "legacy-keepproxy-config", defaultKeepproxyConfigPath, "Legacy keepproxy configuration `file`")
@@ -135,6 +137,9 @@ func (ldr *Loader) MungeLegacyConfigArgs(lgr logrus.FieldLogger, args []string,
        if legacyConfigArg != "-legacy-ws-config" {
                ldr.WebsocketPath = ""
        }
+       if legacyConfigArg != "-legacy-keepweb-config" {
+               ldr.KeepWebPath = ""
+       }
        if legacyConfigArg != "-legacy-keepproxy-config" {
                ldr.WebsocketPath = ""
        }
@@ -235,6 +240,7 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                // legacy config file for the current component
                for _, err := range []error{
                        ldr.loadOldKeepstoreConfig(&cfg),
+                       ldr.loadOldKeepWebConfig(&cfg),
                        ldr.loadOldCrunchDispatchSlurmConfig(&cfg),
                        ldr.loadOldWebsocketConfig(&cfg),
                        ldr.loadOldKeepproxyConfig(&cfg),
index 07daf2f90ef28b3199e856c93134aa5b6975fab3..b7b1e64483f926c2da706541f233482fafeae384 100644 (file)
@@ -232,6 +232,10 @@ func fetchRemoteCollectionByPDH(
                        // No need to query local cluster again
                        continue
                }
+               if remoteID == "*" {
+                       // This isn't a real remote cluster: it just sets defaults for unlisted remotes.
+                       continue
+               }
 
                wg.Add(1)
                go func(remote string) {
@@ -293,10 +297,8 @@ func fetchRemoteCollectionByPDH(
                        var errors []string
                        for len(errorChan) > 0 {
                                err := <-errorChan
-                               if httperr, ok := err.(HTTPError); ok {
-                                       if httperr.Code != http.StatusNotFound {
-                                               errorCode = http.StatusBadGateway
-                                       }
+                               if httperr, ok := err.(HTTPError); !ok || httperr.Code != http.StatusNotFound {
+                                       errorCode = http.StatusBadGateway
                                }
                                errors = append(errors, err.Error())
                        }
index 63e801b22c7a2bebd5a6a35fdefa4835c1775c1d..3bcafacd2c8bc47bdce87bb6908169c710662c6d 100644 (file)
@@ -142,8 +142,7 @@ func (conn *Conn) tryLocalThenRemotes(ctx context.Context, fn func(context.Conte
        if all404 {
                return notFoundError{}
        }
-       // FIXME: choose appropriate HTTP status
-       return fmt.Errorf("errors: %v", errs)
+       return httpErrorf(http.StatusBadGateway, "errors: %v", errs)
 }
 
 func (conn *Conn) CollectionCreate(ctx context.Context, options arvados.CreateOptions) (arvados.Collection, error) {
@@ -206,8 +205,9 @@ func (conn *Conn) CollectionGet(ctx context.Context, options arvados.GetOptions)
                        // hash+size+hints; only hash+size need to
                        // match the computed PDH.
                        if pdh := portableDataHash(c.ManifestText); pdh != options.UUID && !strings.HasPrefix(options.UUID, pdh+"+") {
-                               ctxlog.FromContext(ctx).Warnf("bad portable data hash %q received from remote %q (expected %q)", pdh, remoteID, options.UUID)
-                               return notFoundError{}
+                               err = httpErrorf(http.StatusBadGateway, "bad portable data hash %q received from remote %q (expected %q)", pdh, remoteID, options.UUID)
+                               ctxlog.FromContext(ctx).Warn(err)
+                               return err
                        }
                        if remoteID != "" {
                                c.ManifestText = rewriteManifest(c.ManifestText, remoteID)
index 169b1f79614bd9b0391542ae71771dac87e0d95d..c654277fea2dff8616b948eda8ce8dbeb76c75c0 100644 (file)
@@ -83,6 +83,9 @@ func (s *FederationSuite) SetUpTest(c *check.C) {
                        Proxy:  true,
                        Scheme: "http",
                },
+               "*": {
+                       Scheme: "https",
+               },
        }
 
        c.Assert(s.testServer.Start(), check.IsNil)
@@ -467,6 +470,10 @@ func (s *FederationSuite) TestGetRemoteCollectionByPDH(c *check.C) {
 func (s *FederationSuite) TestGetCollectionByPDHError(c *check.C) {
        defer s.localServiceReturns404(c).Close()
 
+       // zmock's normal response (200 with an empty body) would
+       // change the outcome from 404 to 502
+       delete(s.testHandler.Cluster.RemoteClusters, "zmock")
+
        req := httptest.NewRequest("GET", "/arvados/v1/collections/99999999999999999999999999999999+99", nil)
        req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
 
@@ -479,6 +486,10 @@ func (s *FederationSuite) TestGetCollectionByPDHError(c *check.C) {
 func (s *FederationSuite) TestGetCollectionByPDHErrorBadHash(c *check.C) {
        defer s.localServiceReturns404(c).Close()
 
+       // zmock's normal response (200 with an empty body) would
+       // change the outcome
+       delete(s.testHandler.Cluster.RemoteClusters, "zmock")
+
        srv2 := &httpserver.Server{
                Server: http.Server{
                        Handler: http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
@@ -512,7 +523,7 @@ func (s *FederationSuite) TestGetCollectionByPDHErrorBadHash(c *check.C) {
        resp := s.testRequest(req).Result()
        defer resp.Body.Close()
 
-       c.Check(resp.StatusCode, check.Equals, http.StatusNotFound)
+       c.Check(resp.StatusCode, check.Equals, http.StatusBadGateway)
 }
 
 func (s *FederationSuite) TestSaltedTokenGetCollectionByPDH(c *check.C) {
@@ -534,6 +545,10 @@ func (s *FederationSuite) TestSaltedTokenGetCollectionByPDH(c *check.C) {
 func (s *FederationSuite) TestSaltedTokenGetCollectionByPDHError(c *check.C) {
        arvadostest.SetServiceURL(&s.testHandler.Cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
 
+       // zmock's normal response (200 with an empty body) would
+       // change the outcome
+       delete(s.testHandler.Cluster.RemoteClusters, "zmock")
+
        req := httptest.NewRequest("GET", "/arvados/v1/collections/99999999999999999999999999999999+99", nil)
        req.Header.Set("Authorization", "Bearer v2/zzzzz-gj3su-077z32aux8dg2s1/282d7d172b6cfdce364c5ed12ddf7417b2d00065")
        resp := s.testRequest(req).Result()
index 60aeb1892b11e13c980c8c2a97e10da9fbc7a639..bce7ce5f5cb7a55a083f68bc29d6a88008a03fef 100644 (file)
@@ -22,12 +22,8 @@ Gem::Specification.new do |s|
   s.email       = 'gem-dev@curoverse.com'
   #s.bindir      = '.'
   s.licenses    = ['Apache-2.0']
-  s.files       = ["bin/arv", "bin/arv-run-pipeline-instance",
-                   "bin/arv-crunch-job", "bin/arv-tag", "bin/crunch-job",
-                   "LICENSE-2.0.txt"]
+  s.files       = ["bin/arv", "bin/arv-tag", "LICENSE-2.0.txt"]
   s.executables << "arv"
-  s.executables << "arv-run-pipeline-instance"
-  s.executables << "arv-crunch-job"
   s.executables << "arv-tag"
   s.required_ruby_version = '>= 2.1.0'
   s.add_runtime_dependency 'arvados', '~> 1.3.0', '>= 1.3.0'
index 7110b4b991ab41da1f77fc256ed67b7eadd3daef..25c28bea23c2e62af009cf14c4da19343195f285 100755 (executable)
@@ -131,16 +131,6 @@ def check_subcommands client, arvados, subcommand, global_opts, remaining_opts
       puts "Available methods: ls, get, put, docker"
     end
     abort
-  when 'pipeline'
-    sub = remaining_opts.shift
-    if sub == 'run'
-      exec_bin "arv-run-pipeline-instance", remaining_opts
-    else
-      puts "Usage: arv pipeline [method] [--parameters]\n"
-      puts "Use 'arv pipeline [method] --help' to get more information about specific methods.\n\n"
-      puts "Available methods: run"
-    end
-    abort
   end
 end
 
diff --git a/sdk/cli/bin/arv-crunch-job b/sdk/cli/bin/arv-crunch-job
deleted file mode 100755 (executable)
index 6e4b5e0..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-exec File.join(File.dirname(File.realpath(__FILE__)), 'crunch-job'), *ARGV
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
deleted file mode 100755 (executable)
index 336b1a2..0000000
+++ /dev/null
@@ -1,781 +0,0 @@
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-class WhRunPipelineInstance
-end
-
-if RUBY_VERSION < '1.9.3' then
-  abort <<-EOS
-#{$0.gsub(/^\.\//,'')} requires Ruby version 1.9.3 or higher.
-  EOS
-end
-
-begin
-  require 'arvados'
-  require 'rubygems'
-  require 'json'
-  require 'pp'
-  require 'optimist'
-  require 'google/api_client'
-rescue LoadError => l
-  $stderr.puts $:
-  abort <<-EOS
-#{$0}: fatal: #{l.message}
-Some runtime dependencies may be missing.
-Try: gem install arvados pp google-api-client json optimist
-  EOS
-end
-
-def debuglog(message, verbosity=1)
-  $stderr.puts "#{File.split($0).last} #{$$}: #{message}" if $debuglevel >= verbosity
-end
-
-# Parse command line options (the kind that control the behavior of
-# this program, that is, not the pipeline component parameters).
-
-p = Optimist::Parser.new do
-  version __FILE__
-  banner(<<EOF)
-
-Usage:
-  arv-run-pipeline-instance --template TEMPLATE_UUID [options] [--] [parameters]
-  arv-run-pipeline-instance --instance INSTANCE_UUID [options] [--] [parameters]
-
-Parameters:
-  param_name=param_value
-  param_name param_value
-                         Set (or override) the default value for every
-                         pipeline component parameter with the given
-                         name.
-
-  component_name::param_name=param_value
-  component_name::param_name param_value
-  --component_name::param_name=param_value
-  --component_name::param_name param_value
-                         Set the value of a parameter for a single
-                         pipeline component.
-
-Options:
-EOF
-  opt(:dry_run,
-      "Do not start any new jobs or wait for existing jobs to finish. Just find out whether jobs are finished, queued, or running for each component.",
-      :type => :boolean,
-      :short => :n)
-  opt(:status_text,
-      "Store plain text status in given file.",
-      :short => :none,
-      :type => :string,
-      :default => '/dev/stdout')
-  opt(:status_json,
-      "Store json-formatted pipeline in given file.",
-      :short => :none,
-      :type => :string,
-      :default => '/dev/null')
-  opt(:no_wait,
-      "Do not wait for jobs to finish. Just look up status, submit new jobs if needed, and exit.",
-      :short => :none,
-      :type => :boolean)
-  opt(:no_reuse,
-      "Do not reuse existing jobs to satisfy pipeline components. Submit a new job for every component.",
-      :short => :none,
-      :type => :boolean)
-  opt(:debug,
-      "Print extra debugging information on stderr.",
-      :type => :boolean)
-  opt(:debug_level,
-      "Set debug verbosity level.",
-      :short => :none,
-      :type => :integer)
-  opt(:template,
-      "UUID of pipeline template, or path to local pipeline template file.",
-      :short => :none,
-      :type => :string)
-  opt(:instance,
-      "UUID of pipeline instance.",
-      :short => :none,
-      :type => :string)
-  opt(:submit,
-      "Submit the pipeline instance to the server, and exit. Let the Crunch dispatch service satisfy the components by finding/running jobs.",
-      :short => :none,
-      :type => :boolean)
-  opt(:run_pipeline_here,
-      "Manage the pipeline instance in-process. Submit jobs to Crunch as needed. Do not exit until the pipeline finishes (or fails).",
-      :short => :none,
-      :type => :boolean)
-  opt(:run_jobs_here,
-      "Run jobs in the local terminal session instead of submitting them to Crunch. Implies --run-pipeline-here. Note: this results in a significantly different job execution environment, and some Crunch features are not supported. It can be necessary to modify a pipeline in order to make it run this way.",
-      :short => :none,
-      :type => :boolean)
-  opt(:run_here,
-      "Synonym for --run-jobs-here.",
-      :short => :none,
-      :type => :boolean)
-  opt(:description,
-      "Description for the pipeline instance.",
-      :short => :none,
-      :type => :string)
-  opt(:project_uuid,
-      "UUID of the project for the pipeline instance.",
-      short: :none,
-      type: :string)
-  stop_on [:'--']
-end
-$options = Optimist::with_standard_exception_handling p do
-  p.parse ARGV
-end
-$debuglevel = $options[:debug_level] || ($options[:debug] && 1) || 0
-
-$options[:run_jobs_here] ||= $options[:run_here] # old flag name
-$options[:run_pipeline_here] ||= $options[:run_jobs_here] # B requires A
-
-if $options[:instance]
-  if $options[:template] or $options[:submit]
-    abort "#{$0}: syntax error: --instance cannot be combined with --template or --submit."
-  end
-elsif not $options[:template]
-  $stderr.puts "error: you must supply a --template or --instance."
-  p.educate
-  abort
-end
-
-if $options[:run_pipeline_here] == $options[:submit]
-  abort "#{$0}: error: you must supply --run-pipeline-here, --run-jobs-here, or --submit."
-end
-
-# Set up the API client.
-
-$arv = Arvados.new api_version: 'v1'
-$client = $arv.client
-$arvados = $arv.arvados_api
-
-class PipelineInstance
-  def self.find(uuid)
-    result = $client.execute(:api_method => $arvados.pipeline_instances.get,
-                             :parameters => {
-                               :uuid => uuid
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    j = JSON.parse result.body, :symbolize_names => true
-    unless j.is_a? Hash and j[:uuid]
-      debuglog "Failed to get pipeline_instance: #{j[:errors] rescue nil}", 0
-      nil
-    else
-      debuglog "Retrieved pipeline_instance #{j[:uuid]}"
-      self.new(j)
-    end
-  end
-  def self.create(attributes)
-    result = $client.execute(:api_method => $arvados.pipeline_instances.create,
-                             :body_object => {
-                               :pipeline_instance => attributes
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    j = JSON.parse result.body, :symbolize_names => true
-    unless j.is_a? Hash and j[:uuid]
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nFailed to create pipeline_instance: #{j[:errors] rescue nil} #{j.inspect}"
-    end
-    debuglog "Created pipeline instance: #{j[:uuid]}"
-    self.new(j)
-  end
-  def save
-    result = $client.execute(:api_method => $arvados.pipeline_instances.update,
-                             :parameters => {
-                               :uuid => @pi[:uuid]
-                             },
-                             :body_object => {
-                               :pipeline_instance => @attributes_to_update
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    j = JSON.parse result.body, :symbolize_names => true
-    unless j.is_a? Hash and j[:uuid]
-      debuglog "Failed to save pipeline_instance: #{j[:errors] rescue nil}", 0
-      nil
-    else
-      @attributes_to_update = {}
-      @pi = j
-    end
-  end
-  def []=(x,y)
-    @attributes_to_update[x] = y
-    @pi[x] = y
-  end
-  def [](x)
-    @pi[x]
-  end
-
-  def log_stderr(msg)
-    $arv.log.create log: {
-      event_type: 'stderr',
-      object_uuid: self[:uuid],
-      owner_uuid: self[:owner_uuid],
-      properties: {"text" => msg},
-    }
-  end
-
-  protected
-  def initialize(j)
-    @attributes_to_update = {}
-    @pi = j
-  end
-end
-
-class JobCache
-  def self.get(uuid)
-    @cache ||= {}
-    result = $client.execute(:api_method => $arvados.jobs.get,
-                             :parameters => {
-                               :uuid => uuid
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    @cache[uuid] = JSON.parse result.body, :symbolize_names => true
-  end
-  def self.where(conditions)
-    result = $client.execute(:api_method => $arvados.jobs.list,
-                             :parameters => {
-                               :limit => 10000,
-                               :where => conditions.to_json
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    list = JSON.parse result.body, :symbolize_names => true
-    if list and list[:items].is_a? Array
-      list[:items]
-    else
-      []
-    end
-  end
-
-  # create() returns [job, exception]. If both job and exception are
-  # nil, there was a non-retryable error and the call should not be
-  # attempted again.
-  def self.create(pipeline, component, job, create_params)
-    @cache ||= {}
-
-    body = {job: no_nil_values(job)}.merge(no_nil_values(create_params))
-
-    result = nil
-    begin
-      result = $client.execute(
-        :api_method => $arvados.jobs.create,
-        :body_object => body,
-        :authenticated => false,
-        :headers => {
-          authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-        })
-      if result.status == 429 || result.status >= 500
-        raise Exception.new("HTTP status #{result.status}")
-      end
-    rescue Exception => e
-      return nil, e
-    end
-    j = JSON.parse(result.body, :symbolize_names => true) rescue nil
-    if result.status == 200 && j.is_a?(Hash) && j[:uuid]
-      @cache[j[:uuid]] = j
-      return j, nil
-    else
-      errors = j[:errors] rescue []
-      debuglog "create job: [#{result.status}] #{errors.inspect} with attributes #{body}", 0
-
-      msg = ""
-      errors.each do |err|
-        msg += "Error creating job for component #{component}: #{err}\n"
-      end
-      msg += "Job submission was: #{body.to_json}"
-
-      pipeline.log_stderr(msg)
-      return nil, nil
-    end
-  end
-
-  protected
-
-  def self.no_nil_values(hash)
-    hash.reject { |key, value| value.nil? }
-  end
-end
-
-class WhRunPipelineInstance
-  attr_reader :instance
-
-  def initialize(_options)
-    @options = _options
-  end
-
-  def fetch_template(template)
-    if template.match /[^-0-9a-z]/
-      # Doesn't look like a uuid -- use it as a filename.
-      @template = JSON.parse File.read(template), :symbolize_names => true
-    else
-      result = $client.execute(:api_method => $arvados.pipeline_templates.get,
-                               :parameters => {
-                                 :uuid => template
-                               },
-                               :authenticated => false,
-                               :headers => {
-                                 authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                               })
-      @template = JSON.parse result.body, :symbolize_names => true
-      if !@template[:uuid]
-        abort "#{$0}: fatal: failed to retrieve pipeline template #{template} #{@template[:errors].inspect rescue nil}"
-      end
-    end
-    self
-  end
-
-  def fetch_instance(instance_uuid)
-    @instance = PipelineInstance.find(instance_uuid)
-    @template = @instance
-    self
-  end
-
-  def apply_parameters(params_args)
-    params_args.shift if params_args[0] == '--'
-    params = {}
-    while !params_args.empty?
-      if (re = params_args[0].match /^(--)?([^-].*?)=(.+)/)
-        params[re[2]] = re[3]
-        params_args.shift
-      elsif params_args.size > 1
-        param = params_args.shift.sub /^--/, ''
-        params[param] = params_args.shift
-      else
-        abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nSyntax error: I do not know what to do with arg \"#{params_args[0]}\""
-      end
-    end
-
-    if not @template[:components].is_a?(Hash)
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nSyntax error: Template missing \"components\" hash"
-    end
-    @components = @template[:components].dup
-
-    bad_components = @components.each_pair.select do |cname, cspec|
-      not cspec.is_a?(Hash)
-    end
-    if bad_components.any?
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nSyntax error: Components not specified with hashes: #{bad_components.map(&:first).join(', ')}"
-    end
-
-    bad_components = @components.each_pair.select do |cname, cspec|
-      not cspec[:script_parameters].is_a?(Hash)
-    end
-    if bad_components.any?
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nSyntax error: Components missing \"script_parameters\" hashes: #{bad_components.map(&:first).join(', ')}"
-    end
-
-    errors = []
-    @components.each do |componentname, component|
-      component[:script_parameters].each do |parametername, parameter|
-        parameter = { :value => parameter } unless parameter.is_a? Hash
-        if params.has_key?("#{componentname}::#{parametername}")
-          value = params["#{componentname}::#{parametername}"]
-        elsif parameter.has_key?(:value)
-          value = parameter[:value]
-        elsif parameter.has_key?(:output_of)
-          if !@components[parameter[:output_of].intern]
-            errors << [componentname, parametername, "output_of refers to nonexistent component '#{parameter[:output_of]}'"]
-          else
-            # value will be filled in later when the upstream
-            # component's output becomes known
-          end
-          next
-        elsif params.has_key?(parametername.to_s)
-          value = params[parametername.to_s]
-        elsif parameter.has_key?(:default)
-          value = parameter[:default]
-        elsif [false, 'false', 0, '0'].index(parameter[:required])
-          value = nil
-        else
-          errors << [componentname, parametername, "required parameter is missing"]
-          next
-        end
-        debuglog "parameter #{componentname}::#{parametername} == #{value}"
-
-        component[:script_parameters][parametername] =
-          parameter.dup.merge(value: value)
-      end
-    end
-    if !errors.empty?
-      all_errors = errors.collect do |c,p,e|
-        "#{c}::#{p} - #{e}\n"
-      end.join("")
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nErrors:\n#{all_errors}"
-    end
-    debuglog "options=" + @options.pretty_inspect
-    self
-  end
-
-  def setup_instance
-    if @instance
-      @instance[:properties][:run_options] ||= {}
-      if @options[:no_reuse]
-        # override properties of existing instance
-        @instance[:properties][:run_options][:enable_job_reuse] = false
-      else
-        # Default to "enable reuse" if not specified. (This code path
-        # can go away when old clients go away.)
-        if @instance[:properties][:run_options][:enable_job_reuse].nil?
-          @instance[:properties][:run_options][:enable_job_reuse] = true
-        end
-      end
-    else
-      description = $options[:description] ||
-                    ("Created at #{Time.now.localtime}" + (@template[:name].andand.size.andand>0 ? " using the pipeline template *#{@template[:name]}*" : ""))
-      instance_body = {
-        components: @components,
-        properties: {
-          run_options: {
-            enable_job_reuse: !@options[:no_reuse]
-          }
-        },
-        pipeline_template_uuid: @template[:uuid],
-        description: description,
-        state: ($options[:submit] ? 'RunningOnServer' : 'RunningOnClient')
-      }
-      if @options[:project_uuid]
-        instance_body[:owner_uuid] = @options[:project_uuid]
-      end
-      @instance = PipelineInstance.create(instance_body)
-    end
-    self
-  end
-
-  def run
-    moretodo = true
-    interrupted = false
-
-    if @instance[:started_at].nil?
-      @instance[:started_at] = Time.now
-    end
-
-    job_creation_failed = 0
-    while moretodo
-      moretodo = false
-      @components.each do |cname, c|
-        job = nil
-        owner_uuid = @instance[:owner_uuid]
-        # Is the job satisfying this component already known to be
-        # finished? (Already meaning "before we query API server about
-        # the job's current state")
-        c_already_finished = (c[:job] &&
-                              c[:job][:uuid] &&
-                              ["Complete", "Failed", "Cancelled"].include?(c[:job][:state]))
-        if !c[:job] and
-            c[:script_parameters].select { |pname, p| p.is_a? Hash and p[:output_of]}.empty?
-          # No job yet associated with this component and is component inputs
-          # are fully specified (any output_of script_parameters are resolved
-          # to real value)
-          my_submit_id = "instance #{@instance[:uuid]} rand #{rand(2**64).to_s(36)}"
-          job, err = JobCache.create(@instance, cname, {
-            :script => c[:script],
-            :script_parameters => Hash[c[:script_parameters].map do |key, spec|
-                                         [key, spec[:value]]
-                                       end],
-            :script_version => c[:script_version],
-            :repository => c[:repository],
-            :nondeterministic => c[:nondeterministic],
-            :runtime_constraints => c[:runtime_constraints],
-            :owner_uuid => owner_uuid,
-            :is_locked_by_uuid => (@options[:run_jobs_here] ? owner_uuid : nil),
-            :submit_id => my_submit_id,
-            :state => (if @options[:run_jobs_here] then "Running" else "Queued" end)
-          }, {
-            # This is the right place to put these attributes when
-            # dealing with new API servers.
-            :minimum_script_version => c[:minimum_script_version],
-            :exclude_script_versions => c[:exclude_minimum_script_versions],
-            :find_or_create => (@instance[:properties][:run_options].andand[:enable_job_reuse] &&
-                                !c[:nondeterministic]),
-            :filters => c[:filters]
-          })
-          if job
-            debuglog "component #{cname} new job #{job[:uuid]}"
-            c[:job] = job
-            c[:run_in_process] = (@options[:run_jobs_here] and
-                                  job[:submit_id] == my_submit_id)
-          elsif err.nil?
-            debuglog "component #{cname} new job failed", 0
-            job_creation_failed += 1
-          else
-            debuglog "component #{cname} new job failed, err=#{err}", 0
-          end
-        end
-
-        if c[:job] and c[:run_in_process] and not ["Complete", "Failed", "Cancelled"].include? c[:job][:state]
-          report_status
-          begin
-            require 'open3'
-            Open3.popen3("arv-crunch-job", "--force-unlock",
-                         "--job", c[:job][:uuid]) do |stdin, stdout, stderr, wait_thr|
-              debuglog "arv-crunch-job pid #{wait_thr.pid} started", 0
-              stdin.close
-              while true
-                rready, wready, = IO.select([stdout, stderr], [])
-                break if !rready[0]
-                begin
-                  buf = rready[0].read_nonblock(2**20)
-                rescue EOFError
-                  break
-                end
-                (rready[0] == stdout ? $stdout : $stderr).write(buf)
-              end
-              stdout.close
-              stderr.close
-              debuglog "arv-crunch-job pid #{wait_thr.pid} exit #{wait_thr.value.to_i}", 0
-            end
-            if not $arv.job.get(uuid: c[:job][:uuid])[:finished_at]
-              raise Exception.new("arv-crunch-job did not set finished_at.")
-            end
-          rescue Exception => e
-            debuglog "Interrupted (#{e}). Failing job.", 0
-            $arv.job.update(uuid: c[:job][:uuid],
-                            job: {
-                              state: "Failed"
-                            })
-          end
-        end
-
-        if c[:job] and c[:job][:uuid]
-          if ["Running", "Queued"].include?(c[:job][:state])
-            # Job is running (or may be soon) so update copy of job record
-            c[:job] = JobCache.get(c[:job][:uuid])
-          end
-
-          if c[:job][:state] == "Complete"
-            # Populate script_parameters of other components waiting for
-            # this job
-            @components.each do |c2name, c2|
-              c2[:script_parameters].each do |pname, p|
-                if p.is_a? Hash and p[:output_of] == cname.to_s
-                  debuglog "parameter #{c2name}::#{pname} == #{c[:job][:output]}"
-                  c2[:script_parameters][pname] = {value: c[:job][:output]}
-                  moretodo = true
-                end
-              end
-            end
-            unless c_already_finished
-              # This is my first time discovering that the job
-              # succeeded. (At the top of this loop, I was still
-              # waiting for it to finish.)
-
-              if @instance[:name].andand.length.andand > 0
-                pipeline_name = @instance[:name]
-              elsif @template.andand[:name].andand.length.andand > 0
-                pipeline_name = @template[:name]
-              else
-                pipeline_name = @instance[:uuid]
-              end
-              if c[:output_name] != false
-                # Create a collection located in the same project as the pipeline with the contents of the output.
-                portable_data_hash = c[:job][:output]
-                collections = $arv.collection.list(limit: 1,
-                                                   filters: [['portable_data_hash', '=', portable_data_hash]],
-                                                   select: ["portable_data_hash", "manifest_text"]
-                                                   )[:items]
-                if collections.any?
-                  name = c[:output_name] || "Output #{portable_data_hash[0..7]} of #{cname} of #{pipeline_name}"
-
-                  # check if there is a name collision.
-                  name_collisions = $arv.collection.list(filters: [["owner_uuid", "=", owner_uuid],
-                                                                   ["name", "=", name]])[:items]
-
-                  newcollection_actual = nil
-                  if name_collisions.any? and name_collisions.first[:portable_data_hash] == portable_data_hash
-                    # There is already a collection with the same name and the
-                    # same contents, so just point to that.
-                    newcollection_actual = name_collisions.first
-                  end
-
-                  if newcollection_actual.nil?
-                    # Did not find a collection with the same name (or the
-                    # collection has a different portable data hash) so create
-                    # a new collection with ensure_unique_name: true.
-                    newcollection = {
-                      owner_uuid: owner_uuid,
-                      name: name,
-                      portable_data_hash: collections.first[:portable_data_hash],
-                      manifest_text: collections.first[:manifest_text]
-                    }
-                    debuglog "Creating collection #{newcollection}", 0
-                    newcollection_actual = $arv.collection.create collection: newcollection, ensure_unique_name: true
-                  end
-
-                  c[:output_uuid] = newcollection_actual[:uuid]
-                else
-                  debuglog "Could not find a collection with portable data hash #{portable_data_hash}", 0
-                end
-              end
-            end
-          elsif ["Queued", "Running"].include? c[:job][:state]
-            # Job is running or queued to run, so indicate that pipeline
-            # should continue to run
-            moretodo = true
-          elsif c[:job][:state] == "Cancelled"
-            debuglog "component #{cname} job #{c[:job][:uuid]} cancelled."
-            moretodo = false
-          elsif c[:job][:state] == "Failed"
-            moretodo = false
-          end
-        end
-      end
-      @instance[:components] = @components
-      report_status
-
-      if @options[:no_wait]
-        moretodo = false
-      end
-
-      # If job creation fails, just give up on this pipeline instance.
-      if job_creation_failed > 0
-        moretodo = false
-      end
-
-      if moretodo
-        begin
-          sleep 10
-        rescue Interrupt
-          debuglog "interrupt", 0
-          interrupted = true
-          break
-        end
-      end
-    end
-
-    c_in_state = @components.values.group_by { |c|
-      c[:job] and c[:job][:state]
-    }
-    succeeded = c_in_state["Complete"].andand.count || 0
-    failed = (c_in_state["Failed"].andand.count || 0) + (c_in_state["Cancelled"].andand.count || 0)
-    ended = succeeded + failed
-
-    success = (succeeded == @components.length)
-
-    # A job create call failed. Just give up.
-    if job_creation_failed > 0
-      debuglog "job creation failed - giving up on this pipeline instance", 0
-      success = false
-      failed += 1
-    end
-
-    if interrupted
-     if success
-        @instance[:state] = 'Complete'
-     else
-        @instance[:state] = 'Paused'
-     end
-    else
-      if ended == @components.length or failed > 0
-        @instance[:state] = success ? 'Complete' : 'Failed'
-      end
-    end
-
-    if @instance[:finished_at].nil? and ['Complete', 'Failed'].include? @instance[:state]
-      @instance[:finished_at] = Time.now
-    end
-
-    debuglog "pipeline instance state is #{@instance[:state]}"
-
-    # set components_summary
-    components_summary = {"todo" => @components.length - ended, "done" => succeeded, "failed" => failed}
-    @instance[:components_summary] = components_summary
-
-    @instance.save
-  end
-
-  def cleanup
-    if @instance and @instance[:state] == 'RunningOnClient'
-      @instance[:state] = 'Paused'
-      @instance.save
-    end
-  end
-
-  def uuid
-    @instance[:uuid]
-  end
-
-  protected
-
-  def report_status
-    @instance.save
-
-    if @options[:status_json] != '/dev/null'
-      File.open(@options[:status_json], 'w') do |f|
-        f.puts @components.pretty_inspect
-      end
-    end
-
-    if @options[:status_text] != '/dev/null'
-      File.open(@options[:status_text], 'w') do |f|
-        f.puts ""
-        f.puts "#{Time.now} -- pipeline_instance #{@instance[:uuid]}"
-        namewidth = @components.collect { |cname, c| cname.size }.max
-        @components.each do |cname, c|
-          jstatus = if !c[:job]
-                      "-"
-                    else case c[:job][:state]
-                         when "Running"
-                           "#{c[:job][:tasks_summary].inspect}"
-                         when "Complete"
-                           c[:job][:output]
-                         when "Cancelled"
-                           "cancelled #{c[:job][:cancelled_at]}"
-                         when "Failed"
-                           "failed #{c[:job][:finished_at]}"
-                         when "Queued"
-                           "queued #{c[:job][:created_at]}"
-                         end
-                    end
-          f.puts "#{cname.to_s.ljust namewidth} #{c[:job] ? c[:job][:uuid] : '-'.ljust(27)} #{jstatus}"
-        end
-      end
-    end
-  end
-
-  def abort(msg)
-    if @instance
-      if ["New", "Ready", "RunningOnClient",
-          "RunningOnServer"].include?(@instance[:state])
-        @instance[:state] = "Failed"
-        @instance[:finished_at] = Time.now
-        @instance.save
-      end
-      @instance.log_stderr(msg)
-    end
-    Kernel::abort(msg)
-  end
-end
-
-runner = WhRunPipelineInstance.new($options)
-begin
-  if $options[:template]
-    runner.fetch_template($options[:template])
-  else
-    runner.fetch_instance($options[:instance])
-  end
-  runner.apply_parameters(p.leftovers)
-  runner.setup_instance
-  if $options[:submit]
-    runner.instance.save
-    puts runner.instance[:uuid]
-  else
-    runner.run
-  end
-rescue Exception => e
-  runner.cleanup
-  raise e
-end
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
deleted file mode 100755 (executable)
index 242dff7..0000000
+++ /dev/null
@@ -1,2577 +0,0 @@
-#!/usr/bin/env perl
-# -*- mode: perl; perl-indent-level: 2; indent-tabs-mode: nil; -*-
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-=head1 NAME
-
-crunch-job: Execute job steps, save snapshots as requested, collate output.
-
-=head1 SYNOPSIS
-
-Obtain job details from Arvados, run tasks on compute nodes (typically
-invoked by scheduler on controller):
-
- crunch-job --job x-y-z --git-dir /path/to/repo/.git
-
-Obtain job details from command line, run tasks on local machine
-(typically invoked by application or developer on VM):
-
- crunch-job --job '{"script_version":"/path/to/working/tree","script":"scriptname",...}'
-
- crunch-job --job '{"repository":"https://github.com/curoverse/arvados.git","script_version":"master","script":"scriptname",...}'
-
-=head1 OPTIONS
-
-=over
-
-=item --force-unlock
-
-If the job is already locked, steal the lock and run it anyway.
-
-=item --git-dir
-
-Path to a .git directory (or a git URL) where the commit given in the
-job's C<script_version> attribute is to be found. If this is I<not>
-given, the job's C<repository> attribute will be used.
-
-=item --job-api-token
-
-Arvados API authorization token to use during the course of the job.
-
-=item --no-clear-tmp
-
-Do not clear per-job/task temporary directories during initial job
-setup. This can speed up development and debugging when running jobs
-locally.
-
-=item --job
-
-UUID of the job to run, or a JSON-encoded job resource without a
-UUID. If the latter is given, a new job object will be created.
-
-=back
-
-=head1 RUNNING JOBS LOCALLY
-
-crunch-job's log messages appear on stderr along with the job tasks'
-stderr streams. The log is saved in Keep at each checkpoint and when
-the job finishes.
-
-If the job succeeds, the job's output locator is printed on stdout.
-
-While the job is running, the following signals are accepted:
-
-=over
-
-=item control-C, SIGINT, SIGQUIT
-
-Save a checkpoint, terminate any job tasks that are running, and stop.
-
-=item SIGALRM
-
-Save a checkpoint and continue.
-
-=item SIGHUP
-
-Refresh node allocation (i.e., check whether any nodes have been added
-or unallocated) and attributes of the Job record that should affect
-behavior (e.g., cancel job if cancelled_at becomes non-nil).
-
-=back
-
-=cut
-
-
-use strict;
-use POSIX ':sys_wait_h';
-use POSIX qw(strftime);
-use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
-use Arvados;
-use Cwd qw(realpath);
-use Data::Dumper;
-use Digest::MD5 qw(md5_hex);
-use Getopt::Long;
-use IPC::Open2;
-use IO::Select;
-use File::Temp;
-use Fcntl ':flock';
-use File::Path qw( make_path remove_tree );
-
-use constant TASK_TEMPFAIL => 111;
-use constant EX_TEMPFAIL => 75;
-use constant EX_RETRY_UNLOCKED => 93;
-
-$ENV{"TMPDIR"} ||= "/tmp";
-unless (defined $ENV{"CRUNCH_TMP"}) {
-  $ENV{"CRUNCH_TMP"} = $ENV{"TMPDIR"} . "/crunch-job";
-  if ($ENV{"USER"} ne "crunch" && $< != 0) {
-    # use a tmp dir unique for my uid
-    $ENV{"CRUNCH_TMP"} .= "-$<";
-  }
-}
-
-# Create the tmp directory if it does not exist
-if ( ! -d $ENV{"CRUNCH_TMP"} ) {
-  make_path $ENV{"CRUNCH_TMP"} or die "Failed to create temporary working directory: " . $ENV{"CRUNCH_TMP"};
-}
-
-$ENV{"JOB_WORK"} = $ENV{"CRUNCH_TMP"} . "/work";
-$ENV{"CRUNCH_INSTALL"} = "$ENV{CRUNCH_TMP}/opt";
-$ENV{"CRUNCH_WORK"} = $ENV{"JOB_WORK"}; # deprecated
-mkdir ($ENV{"JOB_WORK"});
-
-my %proc;
-my $force_unlock;
-my $git_dir;
-my $jobspec;
-my $job_api_token;
-my $no_clear_tmp;
-my $resume_stash;
-my $cgroup_root = "/sys/fs/cgroup";
-my $docker_bin = "docker.io";
-my $docker_run_args = "";
-my $srun_sync_timeout = 15*60;
-GetOptions('force-unlock' => \$force_unlock,
-           'git-dir=s' => \$git_dir,
-           'job=s' => \$jobspec,
-           'job-api-token=s' => \$job_api_token,
-           'no-clear-tmp' => \$no_clear_tmp,
-           'resume-stash=s' => \$resume_stash,
-           'cgroup-root=s' => \$cgroup_root,
-           'docker-bin=s' => \$docker_bin,
-           'docker-run-args=s' => \$docker_run_args,
-           'srun-sync-timeout=i' => \$srun_sync_timeout,
-    );
-
-if (defined $job_api_token) {
-  $ENV{ARVADOS_API_TOKEN} = $job_api_token;
-}
-
-my $have_slurm = exists $ENV{SLURM_JOB_ID} && exists $ENV{SLURM_NODELIST};
-
-
-$SIG{'USR1'} = sub
-{
-  $main::ENV{CRUNCH_DEBUG} = 1;
-};
-$SIG{'USR2'} = sub
-{
-  $main::ENV{CRUNCH_DEBUG} = 0;
-};
-
-my $arv = Arvados->new('apiVersion' => 'v1');
-
-my $Job;
-my $job_id;
-my $dbh;
-my $sth;
-my @jobstep;
-
-my $local_job;
-if ($jobspec =~ /^[-a-z\d]+$/)
-{
-  # $jobspec is an Arvados UUID, not a JSON job specification
-  $Job = api_call("jobs/get", uuid => $jobspec);
-  $local_job = 0;
-}
-else
-{
-  $local_job = JSON::decode_json($jobspec);
-}
-
-
-# Make sure our workers (our slurm nodes, localhost, or whatever) are
-# at least able to run basic commands: they aren't down or severely
-# misconfigured.
-my $cmd = ['true'];
-if (($Job || $local_job)->{docker_image_locator}) {
-  $cmd = [$docker_bin, 'ps', '-q'];
-}
-Log(undef, "Sanity check is `@$cmd`");
-my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-  ["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
-  $cmd,
-  {label => "sanity check"});
-if ($exited != 0) {
-  Log(undef, "Sanity check failed: ".exit_status_s($exited));
-  exit EX_TEMPFAIL;
-}
-Log(undef, "Sanity check OK");
-
-
-my $User = api_call("users/current");
-
-if (!$local_job) {
-  if (!$force_unlock) {
-    # Claim this job, and make sure nobody else does
-    eval { api_call("jobs/lock", uuid => $Job->{uuid}); };
-    if ($@) {
-      Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
-      exit EX_TEMPFAIL;
-    };
-  }
-}
-else
-{
-  if (!$resume_stash)
-  {
-    map { croak ("No $_ specified") unless $local_job->{$_} }
-    qw(script script_version script_parameters);
-  }
-
-  $local_job->{'is_locked_by_uuid'} = $User->{'uuid'};
-  $local_job->{'started_at'} = gmtime;
-  $local_job->{'state'} = 'Running';
-
-  $Job = api_call("jobs/create", job => $local_job);
-}
-$job_id = $Job->{'uuid'};
-
-my $keep_logfile = $job_id . '.log.txt';
-log_writer_start($keep_logfile);
-
-$Job->{'runtime_constraints'} ||= {};
-$Job->{'runtime_constraints'}->{'max_tasks_per_node'} ||= 0;
-my $max_ncpus = $Job->{'runtime_constraints'}->{'max_tasks_per_node'};
-
-my $gem_versions = `gem list --quiet arvados-cli 2>/dev/null`;
-if ($? == 0) {
-  $gem_versions =~ s/^arvados-cli \(/ with arvados-cli Gem version(s) /;
-  chomp($gem_versions);
-  chop($gem_versions);  # Closing parentheses
-} else {
-  $gem_versions = "";
-}
-Log(undef,
-    "running from " . ((-e $0) ? realpath($0) : "stdin") . $gem_versions);
-
-Log (undef, "check slurm allocation");
-my @slot;
-my @node;
-# Should use $ENV{SLURM_TASKS_PER_NODE} instead of sinfo? (eg. "4(x3),2,4(x2)")
-my @sinfo;
-if (!$have_slurm)
-{
-  my $localcpus = 0 + `grep -cw ^processor /proc/cpuinfo` || 1;
-  push @sinfo, "$localcpus localhost";
-}
-if (exists $ENV{SLURM_NODELIST})
-{
-  push @sinfo, `sinfo -h --format='%c %N' --nodes=\Q$ENV{SLURM_NODELIST}\E`;
-}
-foreach (@sinfo)
-{
-  my ($ncpus, $slurm_nodelist) = split;
-  $ncpus = $max_ncpus if $max_ncpus && $ncpus > $max_ncpus;
-
-  my @nodelist;
-  while ($slurm_nodelist =~ s/^([^\[,]+?(\[.*?\])?)(,|$)//)
-  {
-    my $nodelist = $1;
-    if ($nodelist =~ /\[((\d+)(-(\d+))?(,(\d+)(-(\d+))?)*)\]/)
-    {
-      my $ranges = $1;
-      foreach (split (",", $ranges))
-      {
-       my ($a, $b);
-       if (/(\d+)-(\d+)/)
-       {
-         $a = $1;
-         $b = $2;
-       }
-       else
-       {
-         $a = $_;
-         $b = $_;
-       }
-       push @nodelist, map {
-         my $n = $nodelist;
-         $n =~ s/\[[-,\d]+\]/$_/;
-         $n;
-       } ($a..$b);
-      }
-    }
-    else
-    {
-      push @nodelist, $nodelist;
-    }
-  }
-  foreach my $nodename (@nodelist)
-  {
-    Log (undef, "node $nodename - $ncpus slots");
-    my $node = { name => $nodename,
-                 ncpus => $ncpus,
-                 # The number of consecutive times a task has been dispatched
-                 # to this node and failed.
-                 losing_streak => 0,
-                 # The number of consecutive times that SLURM has reported
-                 # a node failure since the last successful task.
-                 fail_count => 0,
-                 # Don't dispatch work to this node until this time
-                 # (in seconds since the epoch) has passed.
-                 hold_until => 0 };
-    foreach my $cpu (1..$ncpus)
-    {
-      push @slot, { node => $node,
-                   cpu => $cpu };
-    }
-  }
-  push @node, @nodelist;
-}
-
-
-
-# Ensure that we get one jobstep running on each allocated node before
-# we start overloading nodes with concurrent steps
-
-@slot = sort { $a->{cpu} <=> $b->{cpu} } @slot;
-
-
-$Job->update_attributes(
-  'tasks_summary' => { 'failed' => 0,
-                       'todo' => 1,
-                       'running' => 0,
-                       'done' => 0 });
-
-Log (undef, "start");
-$SIG{'INT'} = sub { $main::please_freeze = 1; };
-$SIG{'QUIT'} = sub { $main::please_freeze = 1; };
-$SIG{'TERM'} = \&croak;
-$SIG{'TSTP'} = sub { $main::please_freeze = 1; };
-$SIG{'ALRM'} = sub { $main::please_info = 1; };
-$SIG{'CONT'} = sub { $main::please_continue = 1; };
-$SIG{'HUP'} = sub { $main::please_refresh = 1; };
-
-$main::please_freeze = 0;
-$main::please_info = 0;
-$main::please_continue = 0;
-$main::please_refresh = 0;
-my $jobsteps_must_output_keys = 0;     # becomes 1 when any task outputs a key
-
-grep { $ENV{$1} = $2 if /^(NOCACHE.*?)=(.*)/ } split ("\n", $$Job{knobs});
-$ENV{"CRUNCH_JOB_UUID"} = $job_id;
-$ENV{"JOB_UUID"} = $job_id;
-
-
-my @jobstep_todo = ();
-my @jobstep_done = ();
-my @jobstep_tomerge = ();
-my $jobstep_tomerge_level = 0;
-my $squeue_checked = 0;
-my $sinfo_checked = 0;
-my $latest_refresh = scalar time;
-
-
-
-if (defined $Job->{thawedfromkey})
-{
-  thaw ($Job->{thawedfromkey});
-}
-else
-{
-  my $first_task = api_call("job_tasks/create", job_task => {
-    'job_uuid' => $Job->{'uuid'},
-    'sequence' => 0,
-    'qsequence' => 0,
-    'parameters' => {},
-  });
-  push @jobstep, { 'level' => 0,
-                  'failures' => 0,
-                   'arvados_task' => $first_task,
-                };
-  push @jobstep_todo, 0;
-}
-
-
-if (!$have_slurm)
-{
-  must_lock_now("$ENV{CRUNCH_TMP}/.lock", "a job is already running here.");
-}
-
-my $build_script = handle_readall(\*DATA);
-my $nodelist = join(",", @node);
-my $git_tar_count = 0;
-
-if (!defined $no_clear_tmp) {
-  # Find FUSE mounts under $CRUNCH_TMP and unmount them.  Then clean
-  # up work directories crunch_tmp/work, crunch_tmp/opt,
-  # crunch_tmp/src*.
-  my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-    ["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
-    ['bash', '-ec', q{
-arv-mount --unmount-timeout 10 --unmount-all ${CRUNCH_TMP}
-rm -rf ${JOB_WORK} ${CRUNCH_INSTALL} ${CRUNCH_TMP}/task ${CRUNCH_TMP}/src* ${CRUNCH_TMP}/*.cid
-    }],
-    {label => "clean work dirs"});
-  if ($exited != 0) {
-    exit_retry_unlocked();
-  }
-}
-
-# If this job requires a Docker image, install that.
-my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem, $dockeruserarg);
-if ($docker_locator = $Job->{docker_image_locator}) {
-  Log (undef, "Install docker image $docker_locator");
-  ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
-  if (!$docker_hash)
-  {
-    croak("No Docker image hash found from locator $docker_locator");
-  }
-  Log (undef, "docker image hash is $docker_hash");
-  $docker_stream =~ s/^\.//;
-  my $docker_install_script = qq{
-loaded() {
-  id=\$($docker_bin inspect --format="{{.ID}}" \Q$docker_hash\E) || return 1
-  echo "image ID is \$id"
-  [[ \${id} = \Q$docker_hash\E ]]
-}
-if loaded >&2 2>/dev/null; then
-  echo >&2 "image is already present"
-  exit 0
-fi
-echo >&2 "docker image is not present; loading"
-arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
-if ! loaded >&2; then
-  echo >&2 "`docker load` exited 0, but image is not found (!)"
-  exit 1
-fi
-echo >&2 "image loaded successfully"
-};
-
-  my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-    ["srun", "--nodelist=" . join(',', @node)],
-    ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
-    {label => "load docker image"});
-  if ($exited != 0)
-  {
-    exit_retry_unlocked();
-  }
-
-  # Determine whether this version of Docker supports memory+swap limits.
-  ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-    ["srun", "--nodes=1"],
-    [$docker_bin, 'run', '--help'],
-    {label => "check --memory-swap feature"});
-  if ($tempfail) {
-    exit_retry_unlocked();
-  }
-  $docker_limitmem = ($stdout =~ /--memory-swap/);
-
-  # Find a non-root Docker user to use.
-  # Tries the default user for the container, then 'crunch', then 'nobody',
-  # testing for whether the actual user id is non-zero.  This defends against
-  # mistakes but not malice, but we intend to harden the security in the future
-  # so we don't want anyone getting used to their jobs running as root in their
-  # Docker containers.
-  my @tryusers = ("", "crunch", "nobody");
-  foreach my $try_user (@tryusers) {
-    my $label;
-    my $try_user_arg;
-    if ($try_user eq "") {
-      $label = "check whether default user is UID 0";
-      $try_user_arg = "";
-    } else {
-      $label = "check whether user '$try_user' is UID 0";
-      $try_user_arg = "--user=$try_user";
-    }
-    my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-      ["srun", "--nodes=1"],
-      ["/bin/sh", "-ec",
-       "$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
-      {label => $label});
-    chomp($stdout);
-    if ($exited == 0 && $stdout =~ /^\d+$/ && $stdout > 0) {
-      $dockeruserarg = $try_user_arg;
-      if ($try_user eq "") {
-        Log(undef, "Container will run with default user");
-      } else {
-        Log(undef, "Container will run with $dockeruserarg");
-      }
-      last;
-    } elsif ($tempfail) {
-      exit_retry_unlocked();
-    }
-  }
-
-  if (!defined $dockeruserarg) {
-    croak("Could not find a user in container that is not UID 0 (tried default user, @tryusers) or there was a problem running 'id' in the container.");
-  }
-
-  if ($Job->{arvados_sdk_version}) {
-    # The job also specifies an Arvados SDK version.  Add the SDKs to the
-    # tar file for the build script to install.
-    Log(undef, sprintf("Packing Arvados SDK version %s for installation",
-                       $Job->{arvados_sdk_version}));
-    add_git_archive("git", "--git-dir=$git_dir", "archive",
-                    "--prefix=.arvados.sdk/",
-                    $Job->{arvados_sdk_version}, "sdk");
-  }
-}
-
-if (!defined $git_dir && $Job->{'script_version'} =~ m{^/}) {
-  # If script_version looks like an absolute path, *and* the --git-dir
-  # argument was not given -- which implies we were not invoked by
-  # crunch-dispatch -- we will use the given path as a working
-  # directory instead of resolving script_version to a git commit (or
-  # doing anything else with git).
-  $ENV{"CRUNCH_SRC_COMMIT"} = $Job->{'script_version'};
-  $ENV{"CRUNCH_SRC"} = $Job->{'script_version'};
-}
-else {
-  # Resolve the given script_version to a git commit sha1. Also, if
-  # the repository is remote, clone it into our local filesystem: this
-  # ensures "git archive" will work, and is necessary to reliably
-  # resolve a symbolic script_version like "master^".
-  $ENV{"CRUNCH_SRC"} = "$ENV{CRUNCH_TMP}/src";
-
-  Log (undef, "Looking for version ".$Job->{script_version}." from repository ".$Job->{repository});
-
-  $ENV{"CRUNCH_SRC_COMMIT"} = $Job->{script_version};
-
-  # If we're running under crunch-dispatch, it will have already
-  # pulled the appropriate source tree into its own repository, and
-  # given us that repo's path as $git_dir.
-  #
-  # If we're running a "local" job, we might have to fetch content
-  # from a remote repository.
-  #
-  # (Currently crunch-dispatch gives a local path with --git-dir, but
-  # we might as well accept URLs there too in case it changes its
-  # mind.)
-  my $repo = $git_dir || $Job->{'repository'};
-
-  # Repository can be remote or local. If remote, we'll need to fetch it
-  # to a local dir before doing `git log` et al.
-  my $repo_location;
-
-  if ($repo =~ m{://|^[^/]*:}) {
-    # $repo is a git url we can clone, like git:// or https:// or
-    # file:/// or [user@]host:repo.git. Note "user/name@host:foo" is
-    # not recognized here because distinguishing that from a local
-    # path is too fragile. If you really need something strange here,
-    # use the ssh:// form.
-    $repo_location = 'remote';
-  } elsif ($repo =~ m{^\.*/}) {
-    # $repo is a local path to a git index. We'll also resolve ../foo
-    # to ../foo/.git if the latter is a directory. To help
-    # disambiguate local paths from named hosted repositories, this
-    # form must be given as ./ or ../ if it's a relative path.
-    if (-d "$repo/.git") {
-      $repo = "$repo/.git";
-    }
-    $repo_location = 'local';
-  } else {
-    # $repo is none of the above. It must be the name of a hosted
-    # repository.
-    my $arv_repo_list = api_call("repositories/list",
-                                 'filters' => [['name','=',$repo]]);
-    my @repos_found = @{$arv_repo_list->{'items'}};
-    my $n_found = $arv_repo_list->{'serverResponse'}->{'items_available'};
-    if ($n_found > 0) {
-      Log(undef, "Repository '$repo' -> "
-          . join(", ", map { $_->{'uuid'} } @repos_found));
-    }
-    if ($n_found != 1) {
-      croak("Error: Found $n_found repositories with name '$repo'.");
-    }
-    $repo = $repos_found[0]->{'fetch_url'};
-    $repo_location = 'remote';
-  }
-  Log(undef, "Using $repo_location repository '$repo'");
-  $ENV{"CRUNCH_SRC_URL"} = $repo;
-
-  # Resolve given script_version (we'll call that $treeish here) to a
-  # commit sha1 ($commit).
-  my $treeish = $Job->{'script_version'};
-  my $commit;
-  if ($repo_location eq 'remote') {
-    # We minimize excess object-fetching by re-using the same bare
-    # repository in CRUNCH_TMP/.git for multiple crunch-jobs -- we
-    # just keep adding remotes to it as needed.
-    my $local_repo = $ENV{'CRUNCH_TMP'}."/.git";
-    my $gitcmd = "git --git-dir=\Q$local_repo\E";
-
-    # Set up our local repo for caching remote objects, making
-    # archives, etc.
-    if (!-d $local_repo) {
-      make_path($local_repo) or croak("Error: could not create $local_repo");
-    }
-    # This works (exits 0 and doesn't delete fetched objects) even
-    # if $local_repo is already initialized:
-    `$gitcmd init --bare`;
-    if ($?) {
-      croak("Error: $gitcmd init --bare exited ".exit_status_s($?));
-    }
-
-    # If $treeish looks like a hash (or abbrev hash) we look it up in
-    # our local cache first, since that's cheaper. (We don't want to
-    # do that with tags/branches though -- those change over time, so
-    # they should always be resolved by the remote repo.)
-    if ($treeish =~ /^[0-9a-f]{7,40}$/s) {
-      # Hide stderr because it's normal for this to fail:
-      my $sha1 = `$gitcmd rev-list -n1 ''\Q$treeish\E 2>/dev/null`;
-      if ($? == 0 &&
-          # Careful not to resolve a branch named abcdeff to commit 1234567:
-          $sha1 =~ /^$treeish/ &&
-          $sha1 =~ /^([0-9a-f]{40})$/s) {
-        $commit = $1;
-        Log(undef, "Commit $commit already present in $local_repo");
-      }
-    }
-
-    if (!defined $commit) {
-      # If $treeish isn't just a hash or abbrev hash, or isn't here
-      # yet, we need to fetch the remote to resolve it correctly.
-
-      # First, remove all local heads. This prevents a name that does
-      # not exist on the remote from resolving to (or colliding with)
-      # a previously fetched branch or tag (possibly from a different
-      # remote).
-      remove_tree("$local_repo/refs/heads", {keep_root => 1});
-
-      Log(undef, "Fetching objects from $repo to $local_repo");
-      `$gitcmd fetch --no-progress --tags ''\Q$repo\E \Q+refs/heads/*:refs/heads/*\E`;
-      if ($?) {
-        croak("Error: `$gitcmd fetch` exited ".exit_status_s($?));
-      }
-    }
-
-    # Now that the data is all here, we will use our local repo for
-    # the rest of our git activities.
-    $repo = $local_repo;
-  }
-
-  my $gitcmd = "git --git-dir=\Q$repo\E";
-  my $sha1 = `$gitcmd rev-list -n1 ''\Q$treeish\E`;
-  unless ($? == 0 && $sha1 =~ /^([0-9a-f]{40})$/) {
-    croak("`$gitcmd rev-list` exited "
-          .exit_status_s($?)
-          .", '$treeish' not found, giving up");
-  }
-  $commit = $1;
-  Log(undef, "Version $treeish is commit $commit");
-
-  if ($commit ne $Job->{'script_version'}) {
-    # Record the real commit id in the database, frozentokey, logs,
-    # etc. -- instead of an abbreviation or a branch name which can
-    # become ambiguous or point to a different commit in the future.
-    if (!$Job->update_attributes('script_version' => $commit)) {
-      croak("Error: failed to update job's script_version attribute");
-    }
-  }
-
-  $ENV{"CRUNCH_SRC_COMMIT"} = $commit;
-  add_git_archive("$gitcmd archive ''\Q$commit\E");
-}
-
-my $git_archive = combined_git_archive();
-if (!defined $git_archive) {
-  Log(undef, "Skip install phase (no git archive)");
-  if ($have_slurm) {
-    Log(undef, "Warning: This probably means workers have no source tree!");
-  }
-}
-else {
-  my $exited;
-  my $install_script_tries_left = 3;
-  for (my $attempts = 0; $attempts < 3; $attempts++) {
-    my @srunargs = ("srun",
-                    "--nodelist=$nodelist",
-                    "-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
-    my @execargs = ("sh", "-c",
-                    "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
-
-    $ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
-    my ($stdout, $stderr, $tempfail);
-    ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-      \@srunargs, \@execargs,
-      {label => "run install script on all workers"},
-        $build_script . $git_archive);
-    if ($tempfail) {
-      exit_retry_unlocked();
-    }
-
-    my $stderr_anything_from_script = 0;
-    for my $line (split(/\n/, $stderr)) {
-      if ($line !~ /^(srun: error: |starting: \[)/) {
-        $stderr_anything_from_script = 1;
-      }
-    }
-
-    last if $exited == 0 || $main::please_freeze;
-
-    # If the install script fails but doesn't print an error message,
-    # the next thing anyone is likely to do is just run it again in
-    # case it was a transient problem like "slurm communication fails
-    # because the network isn't reliable enough". So we'll just do
-    # that ourselves (up to 3 attempts in total). OTOH, if there is an
-    # error message, the problem is more likely to have a real fix and
-    # we should fail the job so the fixing process can start, instead
-    # of doing 2 more attempts.
-    last if $stderr_anything_from_script;
-  }
-
-  foreach my $tar_filename (map { tar_filename_n($_); } (1..$git_tar_count)) {
-    unlink($tar_filename);
-  }
-
-  if ($exited != 0) {
-    croak("Giving up");
-  }
-}
-
-foreach (qw (script script_version script_parameters runtime_constraints))
-{
-  Log (undef,
-       "$_ " .
-       (ref($Job->{$_}) ? JSON::encode_json($Job->{$_}) : $Job->{$_}));
-}
-foreach (split (/\n/, $Job->{knobs}))
-{
-  Log (undef, "knob " . $_);
-}
-my $resp = api_call(
-  'nodes/list',
-  'filters' => [['hostname', 'in', \@node]],
-  'order' => 'hostname',
-  'limit' => scalar(@node),
-    );
-for my $n (@{$resp->{items}}) {
-  Log(undef, "$n->{hostname} $n->{uuid} ".JSON::encode_json($n->{properties}));
-}
-
-
-
-$main::success = undef;
-
-
-
-ONELEVEL:
-
-my $thisround_succeeded = 0;
-my $thisround_failed = 0;
-my $thisround_failed_multiple = 0;
-my $working_slot_count = scalar(@slot);
-
-@jobstep_todo = sort { $jobstep[$a]->{level} <=> $jobstep[$b]->{level}
-                      or $a <=> $b } @jobstep_todo;
-my $level = $jobstep[$jobstep_todo[0]]->{level};
-
-my $initial_tasks_this_level = 0;
-foreach my $id (@jobstep_todo) {
-  $initial_tasks_this_level++ if ($jobstep[$id]->{level} == $level);
-}
-
-# If the number of tasks scheduled at this level #T is smaller than the number
-# of slots available #S, only use the first #T slots, or the first slot on
-# each node, whichever number is greater.
-#
-# When we dispatch tasks later, we'll allocate whole-node resources like RAM
-# based on these numbers.  Using fewer slots makes more resources available
-# to each individual task, which should normally be a better strategy when
-# there are fewer of them running with less parallelism.
-#
-# Note that this calculation is not redone if the initial tasks at
-# this level queue more tasks at the same level.  This may harm
-# overall task throughput for that level.
-my @freeslot;
-if ($initial_tasks_this_level < @node) {
-  @freeslot = (0..$#node);
-} elsif ($initial_tasks_this_level < @slot) {
-  @freeslot = (0..$initial_tasks_this_level - 1);
-} else {
-  @freeslot = (0..$#slot);
-}
-my $round_num_freeslots = scalar(@freeslot);
-print STDERR "crunch-job have ${round_num_freeslots} free slots for ${initial_tasks_this_level} initial tasks at this level, ".scalar(@node)." nodes, and ".scalar(@slot)." slots\n";
-
-my %round_max_slots = ();
-for (my $ii = $#freeslot; $ii >= 0; $ii--) {
-  my $this_slot = $slot[$freeslot[$ii]];
-  my $node_name = $this_slot->{node}->{name};
-  $round_max_slots{$node_name} ||= $this_slot->{cpu};
-  last if (scalar(keys(%round_max_slots)) >= @node);
-}
-
-Log(undef, "start level $level with $round_num_freeslots slots");
-my @holdslot;
-my %reader;
-my $progress_is_dirty = 1;
-my $progress_stats_updated = 0;
-
-update_progress_stats();
-
-
-THISROUND:
-for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
-{
-  # Don't create new tasks if we already know the job's final result.
-  last if defined($main::success);
-
-  my $id = $jobstep_todo[$todo_ptr];
-  my $Jobstep = $jobstep[$id];
-  if ($Jobstep->{level} != $level)
-  {
-    next;
-  }
-
-  pipe $reader{$id}, "writer" or croak("pipe() failed: $!");
-  set_nonblocking($reader{$id});
-
-  my $childslot = $freeslot[0];
-  my $childnode = $slot[$childslot]->{node};
-  my $childslotname = join (".",
-                           $slot[$childslot]->{node}->{name},
-                           $slot[$childslot]->{cpu});
-
-  my $childpid = fork();
-  if ($childpid == 0)
-  {
-    $SIG{'INT'} = 'DEFAULT';
-    $SIG{'QUIT'} = 'DEFAULT';
-    $SIG{'TERM'} = 'DEFAULT';
-
-    foreach (values (%reader))
-    {
-      close($_);
-    }
-    fcntl ("writer", F_SETFL, 0) or croak ($!); # no close-on-exec
-    open(STDOUT,">&writer") or croak ($!);
-    open(STDERR,">&writer") or croak ($!);
-
-    undef $dbh;
-    undef $sth;
-
-    delete $ENV{"GNUPGHOME"};
-    $ENV{"TASK_UUID"} = $Jobstep->{'arvados_task'}->{'uuid'};
-    $ENV{"TASK_QSEQUENCE"} = $id;
-    $ENV{"TASK_SEQUENCE"} = $level;
-    $ENV{"JOB_SCRIPT"} = $Job->{script};
-    while (my ($param, $value) = each %{$Job->{script_parameters}}) {
-      $param =~ tr/a-z/A-Z/;
-      $ENV{"JOB_PARAMETER_$param"} = $value;
-    }
-    $ENV{"TASK_SLOT_NODE"} = $slot[$childslot]->{node}->{name};
-    $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu};
-    $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname";
-    $ENV{"HOME"} = $ENV{"TASK_WORK"};
-    $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
-    $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
-    $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
-
-    my $keep_mnt = $ENV{"TASK_WORK"}.".keep";
-
-    $ENV{"GZIP"} = "-n";
-
-    my @srunargs = (
-      "srun",
-      "--nodelist=".$childnode->{name},
-      qw(-n1 -c1 -N1 -D), $ENV{'TMPDIR'},
-      "--job-name=$job_id.$id.$$",
-       );
-
-    my $stdbuf = " stdbuf --output=0 --error=0 ";
-
-    my $arv_file_cache = "";
-    if (defined($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'})) {
-      $arv_file_cache = "--file-cache=" . ($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'} * 1024 * 1024);
-    }
-
-    my $command =
-       "if [ -e \Q$ENV{TASK_WORK}\E ]; then rm -rf \Q$ENV{TASK_WORK}\E; fi; "
-        ."mkdir -p \Q$ENV{CRUNCH_TMP}\E \Q$ENV{JOB_WORK}\E \Q$ENV{TASK_WORK}\E \Q$keep_mnt\E "
-       ."&& cd \Q$ENV{CRUNCH_TMP}\E "
-        # These environment variables get used explicitly later in
-        # $command.  No tool is expected to read these values directly.
-        .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
-        .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
-        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
-        ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP "
-        .q{&& declare -a VOLUMES=() }
-        .q{&& if which crunchrunner >/dev/null ; then VOLUMES+=("--volume=$(which crunchrunner):/usr/local/bin/crunchrunner:ro") ; fi }
-        .q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUMES+=("--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt:ro") ; }
-        .q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUMES+=("--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt:ro") ; fi };
-
-    $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other $arv_file_cache \Q$keep_mnt\E --exec ";
-    $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
-    $ENV{TASK_KEEPMOUNT_TMP} = "$keep_mnt/tmp";
-
-    if ($docker_hash)
-    {
-      my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
-      my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
-      $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
-      $command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
-      # We only set memory limits if Docker lets us limit both memory and swap.
-      # Memory limits alone have been supported longer, but subprocesses tend
-      # to get SIGKILL if they exceed that without any swap limit set.
-      # See #5642 for additional background.
-      if ($docker_limitmem) {
-        $command .= "--memory=\${MEMLIMIT}k --memory-swap=\${SWAPLIMIT}k ";
-      }
-
-      # The source tree and $destdir directory (which we have
-      # installed on the worker host) are available in the container,
-      # under the same path.
-      $command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E ";
-      $command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E ";
-
-      # Currently, we make the "by_pdh" directory in arv-mount's mount
-      # point appear at /keep inside the container (instead of using
-      # the same path as the host like we do with CRUNCH_SRC and
-      # CRUNCH_INSTALL). However, crunch scripts and utilities must
-      # not rely on this. They must use $TASK_KEEPMOUNT.
-      $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E ";
-      $ENV{TASK_KEEPMOUNT} = "/keep";
-
-      # Ditto TASK_KEEPMOUNT_TMP, as /keep_tmp.
-      $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT_TMP}:/keep_tmp\E ";
-      $ENV{TASK_KEEPMOUNT_TMP} = "/keep_tmp";
-
-      # TASK_WORK is almost exactly like a docker data volume: it
-      # starts out empty, is writable, and persists until no
-      # containers use it any more. We don't use --volumes-from to
-      # share it with other containers: it is only accessible to this
-      # task, and it goes away when this task stops.
-      #
-      # However, a docker data volume is writable only by root unless
-      # the mount point already happens to exist in the container with
-      # different permissions. Therefore, we [1] assume /tmp already
-      # exists in the image and is writable by the crunch user; [2]
-      # avoid putting TASK_WORK inside CRUNCH_TMP (which won't be
-      # writable if they are created by docker while setting up the
-      # other --volumes); and [3] create $TASK_WORK inside the
-      # container using $build_script.
-      $command .= "--volume=/tmp ";
-      $ENV{"TASK_WORK"} = "/tmp/crunch-job-task-work/$childslotname";
-      $ENV{"HOME"} = $ENV{"TASK_WORK"};
-      $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
-
-      # TODO: Share a single JOB_WORK volume across all task
-      # containers on a given worker node, and delete it when the job
-      # ends (and, in case that doesn't work, when the next job
-      # starts).
-      #
-      # For now, use the same approach as TASK_WORK above.
-      $ENV{"JOB_WORK"} = "/tmp/crunch-job-work";
-
-      # Bind mount the crunchrunner binary and host TLS certificates file into
-      # the container.
-      $command .= '"${VOLUMES[@]}" ';
-
-      while (my ($env_key, $env_val) = each %ENV)
-      {
-        if ($env_key =~ /^(ARVADOS|CRUNCH|JOB|TASK)_/) {
-          $command .= "--env=\Q$env_key=$env_val\E ";
-        }
-      }
-      $command .= "--env=\QHOME=$ENV{HOME}\E ";
-      $command .= "\Q$docker_hash\E ";
-
-      if ($Job->{arvados_sdk_version}) {
-        $command .= $stdbuf;
-        $command .= "perl - \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E";
-      } else {
-        $command .= "/bin/sh -c \'python -c " .
-            '"from pkg_resources import get_distribution as get; print \"Using Arvados SDK version\", get(\"arvados-python-client\").version"' .
-            ">&2 2>/dev/null; " .
-            "mkdir -p \"$ENV{JOB_WORK}\" \"$ENV{TASK_WORK}\" && " .
-            "if which stdbuf >/dev/null ; then " .
-            "  exec $stdbuf \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" .
-            " else " .
-            "  exec \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" .
-            " fi\'";
-      }
-    } else {
-      # Non-docker run
-      $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 ";
-      $command .= $stdbuf;
-      $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
-    }
-
-    my @execargs = ('bash', '-c', $command);
-    srun (\@srunargs, \@execargs, undef, $build_script);
-    # exec() failed, we assume nothing happened.
-    die "srun() failed on build script\n";
-  }
-  close("writer");
-  if (!defined $childpid)
-  {
-    close $reader{$id};
-    delete $reader{$id};
-    next;
-  }
-  shift @freeslot;
-  $proc{$childpid} = {
-    jobstepidx => $id,
-    time => time,
-    slot => $childslot,
-    jobstepname => "$job_id.$id.$childpid",
-  };
-  croak ("assert failed: \$slot[$childslot]->{'pid'} exists") if exists $slot[$childslot]->{pid};
-  $slot[$childslot]->{pid} = $childpid;
-
-  Log ($id, "job_task ".$Jobstep->{'arvados_task'}->{'uuid'});
-  Log ($id, "child $childpid started on $childslotname");
-  $Jobstep->{starttime} = time;
-  $Jobstep->{node} = $childnode->{name};
-  $Jobstep->{slotindex} = $childslot;
-  delete $Jobstep->{stderr};
-  delete $Jobstep->{finishtime};
-  delete $Jobstep->{tempfail};
-
-  $Jobstep->{'arvados_task'}->{started_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{starttime});
-  retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
-
-  splice @jobstep_todo, $todo_ptr, 1;
-  --$todo_ptr;
-
-  $progress_is_dirty = 1;
-
-  while (!@freeslot
-        ||
-        ($round_num_freeslots > @freeslot && $todo_ptr+1 > $#jobstep_todo))
-  {
-    last THISROUND if $main::please_freeze;
-    if ($main::please_info)
-    {
-      $main::please_info = 0;
-      freeze();
-      create_output_collection();
-      save_meta(1);
-      update_progress_stats();
-    }
-    my $gotsome
-       = readfrompipes ()
-       + reapchildren ();
-    if (!$gotsome || ($latest_refresh + 2 < scalar time))
-    {
-      check_refresh_wanted();
-      check_squeue();
-      update_progress_stats();
-    }
-    elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
-    {
-      update_progress_stats();
-    }
-    if (!$gotsome) {
-      select (undef, undef, undef, 0.1);
-    }
-    $working_slot_count = scalar(grep { $_->{node}->{fail_count} == 0 &&
-                                        $_->{node}->{hold_count} < 4 } @slot);
-    if (($thisround_failed_multiple >= 8 && $thisround_succeeded == 0) ||
-       ($thisround_failed_multiple >= 16 && $thisround_failed_multiple > $thisround_succeeded))
-    {
-      my $message = "Repeated failure rate too high ($thisround_failed_multiple/"
-         .($thisround_failed+$thisround_succeeded)
-         .") -- giving up on this round";
-      Log (undef, $message);
-      last THISROUND;
-    }
-
-    # move slots from freeslot to holdslot (or back to freeslot) if necessary
-    for (my $i=$#freeslot; $i>=0; $i--) {
-      if ($slot[$freeslot[$i]]->{node}->{hold_until} > scalar time) {
-       push @holdslot, (splice @freeslot, $i, 1);
-      }
-    }
-    for (my $i=$#holdslot; $i>=0; $i--) {
-      if ($slot[$holdslot[$i]]->{node}->{hold_until} <= scalar time) {
-       push @freeslot, (splice @holdslot, $i, 1);
-      }
-    }
-
-    # give up if no nodes are succeeding
-    if ($working_slot_count < 1) {
-      Log(undef, "Every node has failed -- giving up");
-      last THISROUND;
-    }
-  }
-}
-
-
-push @freeslot, splice @holdslot;
-map { $slot[$freeslot[$_]]->{node}->{losing_streak} = 0 } (0..$#freeslot);
-
-
-Log (undef, "wait for last ".(scalar keys %proc)." children to finish");
-while (%proc)
-{
-  if ($main::please_continue) {
-    $main::please_continue = 0;
-    goto THISROUND;
-  }
-  $main::please_info = 0, freeze(), create_output_collection(), save_meta(1) if $main::please_info;
-  readfrompipes ();
-  if (!reapchildren())
-  {
-    check_refresh_wanted();
-    check_squeue();
-    update_progress_stats();
-    select (undef, undef, undef, 0.1);
-    killem (keys %proc) if $main::please_freeze;
-  }
-}
-
-update_progress_stats();
-freeze_if_want_freeze();
-
-
-if (!defined $main::success)
-{
-  if (!@jobstep_todo) {
-    $main::success = 1;
-  } elsif ($working_slot_count < 1) {
-    save_output_collection();
-    save_meta();
-    exit_retry_unlocked();
-  } elsif ($thisround_succeeded == 0 &&
-           ($thisround_failed == 0 || $thisround_failed > 4)) {
-    my $message = "stop because $thisround_failed tasks failed and none succeeded";
-    Log (undef, $message);
-    $main::success = 0;
-  }
-}
-
-goto ONELEVEL if !defined $main::success;
-
-
-release_allocation();
-freeze();
-my $collated_output = save_output_collection();
-Log (undef, "finish");
-
-my $final_log = save_meta();
-
-my $final_state;
-if ($collated_output && $final_log && $main::success) {
-  $final_state = 'Complete';
-} else {
-  $final_state = 'Failed';
-}
-$Job->update_attributes('state' => $final_state);
-
-exit (($final_state eq 'Complete') ? 0 : 1);
-
-
-
-sub update_progress_stats
-{
-  $progress_stats_updated = time;
-  return if !$progress_is_dirty;
-  my ($todo, $done, $running) = (scalar @jobstep_todo,
-                                 scalar @jobstep_done,
-                                 scalar keys(%proc));
-  $Job->{'tasks_summary'} ||= {};
-  $Job->{'tasks_summary'}->{'todo'} = $todo;
-  $Job->{'tasks_summary'}->{'done'} = $done;
-  $Job->{'tasks_summary'}->{'running'} = $running;
-  $Job->update_attributes('tasks_summary' => $Job->{'tasks_summary'});
-  Log (undef, "status: $done done, $running running, $todo todo");
-  $progress_is_dirty = 0;
-}
-
-
-
-sub reapchildren
-{
-  my $children_reaped = 0;
-  my @successful_task_uuids = ();
-
-  while((my $pid = waitpid (-1, WNOHANG)) > 0)
-  {
-    my $childstatus = $?;
-
-    my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
-                    . "."
-                    . $slot[$proc{$pid}->{slot}]->{cpu});
-    my $jobstepidx = $proc{$pid}->{jobstepidx};
-
-    readfrompipes_after_exit ($jobstepidx);
-
-    $children_reaped++;
-    my $elapsed = time - $proc{$pid}->{time};
-    my $Jobstep = $jobstep[$jobstepidx];
-
-    my $exitvalue = $childstatus >> 8;
-    my $exitinfo = "exit ".exit_status_s($childstatus);
-    $Jobstep->{'arvados_task'}->reload;
-    my $task_success = $Jobstep->{'arvados_task'}->{success};
-
-    Log ($jobstepidx, "child $pid on $whatslot $exitinfo success=$task_success");
-
-    if (!defined $task_success) {
-      # task did not indicate one way or the other --> fail
-      Log($jobstepidx, sprintf(
-            "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
-            exit_status_s($childstatus)));
-      $Jobstep->{'arvados_task'}->{success} = 0;
-      retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
-      $task_success = 0;
-    }
-
-    if (!$task_success)
-    {
-      my $temporary_fail;
-      $temporary_fail ||= $Jobstep->{tempfail};
-      $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
-
-      ++$thisround_failed;
-      ++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
-
-      # Check for signs of a failed or misconfigured node
-      if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
-          2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
-        # Don't count this against jobstep failure thresholds if this
-        # node is already suspected faulty and srun exited quickly
-        if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
-            $elapsed < 5) {
-          Log ($jobstepidx, "blaming failure on suspect node " .
-               $slot[$proc{$pid}->{slot}]->{node}->{name});
-          $temporary_fail ||= 1;
-        }
-        ban_node_by_slot($proc{$pid}->{slot});
-      }
-
-      Log ($jobstepidx, sprintf('failure (#%d, %s) after %d seconds',
-                                ++$Jobstep->{'failures'},
-                                $temporary_fail ? 'temporary' : 'permanent',
-                                $elapsed));
-
-      if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
-        # Give up on this task, and the whole job
-        $main::success = 0;
-      }
-      # Put this task back on the todo queue
-      push @jobstep_todo, $jobstepidx;
-      $Job->{'tasks_summary'}->{'failed'}++;
-    }
-    else # task_success
-    {
-      push @successful_task_uuids, $Jobstep->{'arvados_task'}->{uuid};
-      ++$thisround_succeeded;
-      $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
-      $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
-      $slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
-      push @jobstep_done, $jobstepidx;
-      Log ($jobstepidx, "success in $elapsed seconds");
-    }
-    $Jobstep->{exitcode} = $childstatus;
-    $Jobstep->{finishtime} = time;
-    $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
-    retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
-    Log ($jobstepidx, sprintf("task output (%d bytes): %s",
-                              length($Jobstep->{'arvados_task'}->{output}),
-                              $Jobstep->{'arvados_task'}->{output}));
-
-    close $reader{$jobstepidx};
-    delete $reader{$jobstepidx};
-    delete $slot[$proc{$pid}->{slot}]->{pid};
-    push @freeslot, $proc{$pid}->{slot};
-    delete $proc{$pid};
-
-    $progress_is_dirty = 1;
-  }
-
-  if (scalar(@successful_task_uuids) > 0)
-  {
-    Log (undef, sprintf("%d tasks exited (%d succeeded), checking for new tasks from API server.", $children_reaped, scalar(@successful_task_uuids)));
-    # Load new tasks
-    my $newtask_list = [];
-    my $newtask_results;
-    do {
-      $newtask_results = api_call(
-        "job_tasks/list",
-        'filters' => [["created_by_job_task_uuid","in",\@successful_task_uuids]],
-        'order' => 'qsequence',
-        'offset' => scalar(@$newtask_list),
-          );
-      push(@$newtask_list, @{$newtask_results->{items}});
-    } while (@{$newtask_results->{items}});
-    Log (undef, sprintf("Got %d new tasks from API server.", scalar(@$newtask_list)));
-    foreach my $arvados_task (@$newtask_list) {
-      my $jobstep = {
-        'level' => $arvados_task->{'sequence'},
-        'failures' => 0,
-        'arvados_task' => $arvados_task
-      };
-      push @jobstep, $jobstep;
-      push @jobstep_todo, $#jobstep;
-    }
-  }
-
-  return $children_reaped;
-}
-
-sub check_refresh_wanted
-{
-  my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
-  if (@stat &&
-      $stat[9] > $latest_refresh &&
-      # ...and we have actually locked the job record...
-      $job_id eq $Job->{'uuid'}) {
-    $latest_refresh = scalar time;
-    my $Job2 = api_call("jobs/get", uuid => $jobspec);
-    for my $attr ('cancelled_at',
-                  'cancelled_by_user_uuid',
-                  'cancelled_by_client_uuid',
-                  'state') {
-      $Job->{$attr} = $Job2->{$attr};
-    }
-    if ($Job->{'state'} ne "Running") {
-      if ($Job->{'state'} eq "Cancelled") {
-        Log (undef, "Job cancelled at " . $Job->{'cancelled_at'} . " by user " . $Job->{'cancelled_by_user_uuid'});
-      } else {
-        Log (undef, "Job state unexpectedly changed to " . $Job->{'state'});
-      }
-      $main::success = 0;
-      $main::please_freeze = 1;
-    }
-  }
-}
-
-sub check_squeue
-{
-  my $last_squeue_check = $squeue_checked;
-
-  # Do not call `squeue` or check the kill list more than once every
-  # 15 seconds.
-  return if $last_squeue_check > time - 15;
-  $squeue_checked = time;
-
-  # Look for children from which we haven't received stderr data since
-  # the last squeue check. If no such children exist, all procs are
-  # alive and there's no need to even look at squeue.
-  #
-  # As long as the crunchstat poll interval (10s) is shorter than the
-  # squeue check interval (15s) this should make the squeue check an
-  # infrequent event.
-  my $silent_procs = 0;
-  for my $js (map {$jobstep[$_->{jobstepidx}]} values %proc)
-  {
-    if (!exists($js->{stderr_at}))
-    {
-      $js->{stderr_at} = 0;
-    }
-    if ($js->{stderr_at} < $last_squeue_check)
-    {
-      $silent_procs++;
-    }
-  }
-  return if $silent_procs == 0;
-
-  # use killem() on procs whose killtime is reached
-  while (my ($pid, $procinfo) = each %proc)
-  {
-    my $js = $jobstep[$procinfo->{jobstepidx}];
-    if (exists $procinfo->{killtime}
-        && $procinfo->{killtime} <= time
-        && $js->{stderr_at} < $last_squeue_check)
-    {
-      my $sincewhen = "";
-      if ($js->{stderr_at}) {
-        $sincewhen = " in last " . (time - $js->{stderr_at}) . "s";
-      }
-      Log($procinfo->{jobstepidx}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
-      killem ($pid);
-    }
-  }
-
-  if (!$have_slurm)
-  {
-    # here is an opportunity to check for mysterious problems with local procs
-    return;
-  }
-
-  # Get a list of steps still running.  Note: squeue(1) says --steps
-  # selects a format (which we override anyway) and allows us to
-  # specify which steps we're interested in (which we don't).
-  # Importantly, it also changes the meaning of %j from "job name" to
-  # "step name" and (although this isn't mentioned explicitly in the
-  # docs) switches from "one line per job" mode to "one line per step"
-  # mode. Without it, we'd just get a list of one job, instead of a
-  # list of N steps.
-  my @squeue = `squeue --jobs=\Q$ENV{SLURM_JOB_ID}\E --steps --format='%j' --noheader`;
-  if ($? != 0)
-  {
-    Log(undef, "warning: squeue exit status $? ($!)");
-    return;
-  }
-  chop @squeue;
-
-  # which of my jobsteps are running, according to squeue?
-  my %ok;
-  for my $jobstepname (@squeue)
-  {
-    $ok{$jobstepname} = 1;
-  }
-
-  # Check for child procs >60s old and not mentioned by squeue.
-  while (my ($pid, $procinfo) = each %proc)
-  {
-    if ($procinfo->{time} < time - 60
-        && $procinfo->{jobstepname}
-        && !exists $ok{$procinfo->{jobstepname}}
-        && !exists $procinfo->{killtime})
-    {
-      # According to slurm, this task has ended (successfully or not)
-      # -- but our srun child hasn't exited. First we must wait (30
-      # seconds) in case this is just a race between communication
-      # channels. Then, if our srun child process still hasn't
-      # terminated, we'll conclude some slurm communication
-      # error/delay has caused the task to die without notifying srun,
-      # and we'll kill srun ourselves.
-      $procinfo->{killtime} = time + 30;
-      Log($procinfo->{jobstepidx}, "notice: task is not in slurm queue but srun process $pid has not exited");
-    }
-  }
-}
-
-sub check_sinfo
-{
-  # If a node fails in a multi-node "srun" call during job setup, the call
-  # may hang instead of exiting with a nonzero code.  This function checks
-  # "sinfo" for the health of the nodes that were allocated and ensures that
-  # they are all still in the "alloc" state.  If a node that is allocated to
-  # this job is not in "alloc" state, then set please_freeze.
-  #
-  # This is only called from srun_sync() for node configuration.  If a
-  # node fails doing actual work, there are other recovery mechanisms.
-
-  # Do not call `sinfo` more than once every 15 seconds.
-  return if $sinfo_checked > time - 15;
-  $sinfo_checked = time;
-
-  # The output format "%t" means output node states.
-  my @sinfo = `sinfo --nodes=\Q$ENV{SLURM_NODELIST}\E --noheader -o "%t"`;
-  if ($? != 0)
-  {
-    Log(undef, "warning: sinfo exit status $? ($!)");
-    return;
-  }
-  chop @sinfo;
-
-  foreach (@sinfo)
-  {
-    if ($_ != "alloc" && $_ != "alloc*") {
-      $main::please_freeze = 1;
-    }
-  }
-}
-
-sub release_allocation
-{
-  if ($have_slurm)
-  {
-    Log (undef, "release job allocation");
-    system "scancel $ENV{SLURM_JOB_ID}";
-  }
-}
-
-
-sub readfrompipes
-{
-  my $gotsome = 0;
-  my %fd_job;
-  my $sel = IO::Select->new();
-  foreach my $jobstepidx (keys %reader)
-  {
-    my $fd = $reader{$jobstepidx};
-    $sel->add($fd);
-    $fd_job{$fd} = $jobstepidx;
-
-    if (my $stdout_fd = $jobstep[$jobstepidx]->{stdout_r}) {
-      $sel->add($stdout_fd);
-      $fd_job{$stdout_fd} = $jobstepidx;
-    }
-  }
-  # select on all reader fds with 0.1s timeout
-  my @ready_fds = $sel->can_read(0.1);
-  foreach my $fd (@ready_fds)
-  {
-    my $buf;
-    if (0 < sysread ($fd, $buf, 65536))
-    {
-      $gotsome = 1;
-      print STDERR $buf if $ENV{CRUNCH_DEBUG};
-
-      my $jobstepidx = $fd_job{$fd};
-      if ($jobstep[$jobstepidx]->{stdout_r} == $fd) {
-        $jobstep[$jobstepidx]->{stdout_captured} .= $buf;
-        next;
-      }
-
-      $jobstep[$jobstepidx]->{stderr_at} = time;
-      $jobstep[$jobstepidx]->{stderr} .= $buf;
-
-      # Consume everything up to the last \n
-      preprocess_stderr ($jobstepidx);
-
-      if (length ($jobstep[$jobstepidx]->{stderr}) > 16384)
-      {
-        # If we get a lot of stderr without a newline, chop off the
-        # front to avoid letting our buffer grow indefinitely.
-        substr ($jobstep[$jobstepidx]->{stderr},
-                0, length($jobstep[$jobstepidx]->{stderr}) - 8192) = "";
-      }
-    }
-  }
-  return $gotsome;
-}
-
-
-# Consume all full lines of stderr for a jobstep. Everything after the
-# last newline will remain in $jobstep[$jobstepidx]->{stderr} after
-# returning.
-sub preprocess_stderr
-{
-  my $jobstepidx = shift;
-  # slotindex is only defined for children running Arvados job tasks.
-  # Be prepared to handle the undef case (for setup srun calls, etc.).
-  my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
-
-  while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
-    my $line = $1;
-    substr $jobstep[$jobstepidx]->{stderr}, 0, 1+length($line), "";
-    Log ($jobstepidx, "stderr $line");
-    if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/i) {
-      # If the allocation is revoked, we can't possibly continue, so mark all
-      # nodes as failed.  This will cause the overall exit code to be
-      # EX_RETRY_UNLOCKED instead of failure so that crunch_dispatch can re-run
-      # this job.
-      $main::please_freeze = 1;
-      foreach my $st (@slot) {
-        $st->{node}->{fail_count}++;
-      }
-    }
-    elsif ($line =~ /srun: error: .*?\b(Node failure on|Aborting, .*?\bio error\b|cannot communicate with node .* aborting job)/i) {
-      $jobstep[$jobstepidx]->{tempfail} = 1;
-      if (defined($job_slot_index)) {
-        $slot[$job_slot_index]->{node}->{fail_count}++;
-        ban_node_by_slot($job_slot_index);
-      }
-    }
-    elsif ($line =~ /srun: error: (Unable to create job step|.*?: Communication connection failure)/i) {
-      $jobstep[$jobstepidx]->{tempfail} = 1;
-      ban_node_by_slot($job_slot_index) if (defined($job_slot_index));
-    }
-    elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
-      $jobstep[$jobstepidx]->{tempfail} = 1;
-    }
-  }
-}
-
-
-# Read whatever is still available on its stderr+stdout pipes after
-# the given child process has exited.
-sub readfrompipes_after_exit
-{
-  my $jobstepidx = shift;
-
-  # The fact that the child has exited allows some convenient
-  # simplifications: (1) all data must have already been written, so
-  # there's no need to wait for more once sysread returns 0; (2) the
-  # total amount of data available is bounded by the pipe buffer size,
-  # so it's safe to read everything into one string.
-  my $buf;
-  while (0 < sysread ($reader{$jobstepidx}, $buf, 65536)) {
-    $jobstep[$jobstepidx]->{stderr_at} = time;
-    $jobstep[$jobstepidx]->{stderr} .= $buf;
-  }
-  if ($jobstep[$jobstepidx]->{stdout_r}) {
-    while (0 < sysread ($jobstep[$jobstepidx]->{stdout_r}, $buf, 65536)) {
-      $jobstep[$jobstepidx]->{stdout_captured} .= $buf;
-    }
-  }
-  preprocess_stderr ($jobstepidx);
-
-  map {
-    Log ($jobstepidx, "stderr $_");
-  } split ("\n", $jobstep[$jobstepidx]->{stderr});
-  $jobstep[$jobstepidx]->{stderr} = '';
-}
-
-sub fetch_block
-{
-  my $hash = shift;
-  my $keep;
-  if (!open($keep, "-|", "arv-get", "--retries", retry_count(), $hash)) {
-    Log(undef, "fetch_block run error from arv-get $hash: $!");
-    return undef;
-  }
-  my $output_block = "";
-  while (1) {
-    my $buf;
-    my $bytes = sysread($keep, $buf, 1024 * 1024);
-    if (!defined $bytes) {
-      Log(undef, "fetch_block read error from arv-get: $!");
-      $output_block = undef;
-      last;
-    } elsif ($bytes == 0) {
-      # sysread returns 0 at the end of the pipe.
-      last;
-    } else {
-      # some bytes were read into buf.
-      $output_block .= $buf;
-    }
-  }
-  close $keep;
-  if ($?) {
-    Log(undef, "fetch_block arv-get exited " . exit_status_s($?));
-    $output_block = undef;
-  }
-  return $output_block;
-}
-
-# Create a collection by concatenating the output of all tasks (each
-# task's output is either a manifest fragment, a locator for a
-# manifest fragment stored in Keep, or nothing at all). Return the
-# portable_data_hash of the new collection.
-sub create_output_collection
-{
-  Log (undef, "collate");
-
-  my ($child_out, $child_in);
-  # This depends on the python-arvados-python-client package, which needs to be installed
-  # on the machine running crunch-dispatch (typically, the API server).
-  my $pid = open2($child_out, $child_in, '/usr/share/python2.7/dist/python-arvados-python-client/bin/python', '-c', q{
-import arvados
-import sys
-print (arvados.api("v1").collections().
-       create(body={"manifest_text": sys.stdin.read(),
-                    "owner_uuid": sys.argv[2]}).
-       execute(num_retries=int(sys.argv[1]))["portable_data_hash"])
-}, retry_count(), $Job->{owner_uuid});
-
-  my $task_idx = -1;
-  my $manifest_size = 0;
-  for (@jobstep)
-  {
-    ++$task_idx;
-    my $output = $_->{'arvados_task'}->{output};
-    next if (!defined($output));
-    my $next_write;
-    if ($output =~ /^[0-9a-f]{32}(\+\S+)*$/) {
-      $next_write = fetch_block($output);
-    } else {
-      $next_write = $output;
-    }
-    if (defined($next_write)) {
-      if (!defined(syswrite($child_in, $next_write))) {
-        # There's been an error writing.  Stop the loop.
-        # We'll log details about the exit code later.
-        last;
-      } else {
-        $manifest_size += length($next_write);
-      }
-    } else {
-      my $uuid = $_->{'arvados_task'}->{'uuid'};
-      Log (undef, "Error retrieving '$output' output by task $task_idx ($uuid)");
-      $main::success = 0;
-    }
-  }
-  close($child_in);
-  Log(undef, "collated output manifest text to send to API server is $manifest_size bytes with access tokens");
-
-  my $joboutput;
-  my $s = IO::Select->new($child_out);
-  if ($s->can_read(120)) {
-    sysread($child_out, $joboutput, 1024 * 1024);
-    waitpid($pid, 0);
-    if ($?) {
-      Log(undef, "output collection creation exited " . exit_status_s($?));
-      $joboutput = undef;
-    } else {
-      chomp($joboutput);
-    }
-  } else {
-    Log (undef, "timed out while creating output collection");
-    foreach my $signal (2, 2, 2, 15, 15, 9) {
-      kill($signal, $pid);
-      last if waitpid($pid, WNOHANG) == -1;
-      sleep(1);
-    }
-  }
-  close($child_out);
-
-  return $joboutput;
-}
-
-# Calls create_output_collection, logs the result, and returns it.
-# If that was successful, save that as the output in the job record.
-sub save_output_collection {
-  my $collated_output = create_output_collection();
-
-  if (!$collated_output) {
-    Log(undef, "Failed to write output collection");
-  }
-  else {
-    Log(undef, "job output $collated_output");
-    $Job->update_attributes('output' => $collated_output);
-  }
-  return $collated_output;
-}
-
-sub killem
-{
-  foreach (@_)
-  {
-    my $sig = 2;               # SIGINT first
-    if (exists $proc{$_}->{"sent_$sig"} &&
-       time - $proc{$_}->{"sent_$sig"} > 4)
-    {
-      $sig = 15;               # SIGTERM if SIGINT doesn't work
-    }
-    if (exists $proc{$_}->{"sent_$sig"} &&
-       time - $proc{$_}->{"sent_$sig"} > 4)
-    {
-      $sig = 9;                        # SIGKILL if SIGTERM doesn't work
-    }
-    if (!exists $proc{$_}->{"sent_$sig"})
-    {
-      Log ($proc{$_}->{jobstepidx}, "sending 2x signal $sig to pid $_");
-      kill $sig, $_;
-      select (undef, undef, undef, 0.1);
-      if ($sig == 2)
-      {
-       kill $sig, $_;     # srun wants two SIGINT to really interrupt
-      }
-      $proc{$_}->{"sent_$sig"} = time;
-      $proc{$_}->{"killedafter"} = time - $proc{$_}->{"time"};
-    }
-  }
-}
-
-
-sub fhbits
-{
-  my($bits);
-  for (@_) {
-    vec($bits,fileno($_),1) = 1;
-  }
-  $bits;
-}
-
-
-# Send log output to Keep via arv-put.
-#
-# $log_pipe_in and $log_pipe_out are the input and output filehandles to the arv-put pipe.
-# $log_pipe_out_buf is a string containing all output read from arv-put so far.
-# $log_pipe_out_select is an IO::Select object around $log_pipe_out.
-# $log_pipe_pid is the pid of the arv-put subprocess.
-#
-# The only functions that should access these variables directly are:
-#
-# log_writer_start($logfilename)
-#     Starts an arv-put pipe, reading data on stdin and writing it to
-#     a $logfilename file in an output collection.
-#
-# log_writer_read_output([$timeout])
-#     Read output from $log_pipe_out and append it to $log_pipe_out_buf.
-#     Passes $timeout to the select() call, with a default of 0.01.
-#     Returns the result of the last read() call on $log_pipe_out, or
-#     -1 if read() wasn't called because select() timed out.
-#     Only other log_writer_* functions should need to call this.
-#
-# log_writer_send($txt)
-#     Writes $txt to the output log collection.
-#
-# log_writer_finish()
-#     Closes the arv-put pipe and returns the output that it produces.
-#
-# log_writer_is_active()
-#     Returns a true value if there is currently a live arv-put
-#     process, false otherwise.
-#
-my ($log_pipe_in, $log_pipe_out, $log_pipe_out_buf, $log_pipe_out_select,
-    $log_pipe_pid);
-
-sub log_writer_start($)
-{
-  my $logfilename = shift;
-  $log_pipe_pid = open2($log_pipe_out, $log_pipe_in,
-                        'arv-put',
-                        '--stream',
-                        '--retries', '6',
-                        '--filename', $logfilename,
-                        '-');
-  $log_pipe_out_buf = "";
-  $log_pipe_out_select = IO::Select->new($log_pipe_out);
-}
-
-sub log_writer_read_output {
-  my $timeout = shift || 0.01;
-  my $read = -1;
-  while ($read && $log_pipe_out_select->can_read($timeout)) {
-    $read = read($log_pipe_out, $log_pipe_out_buf, 65536,
-                 length($log_pipe_out_buf));
-  }
-  if (!defined($read)) {
-    Log(undef, "error reading log manifest from arv-put: $!");
-  }
-  return $read;
-}
-
-sub log_writer_send($)
-{
-  my $txt = shift;
-  print $log_pipe_in $txt;
-  log_writer_read_output();
-}
-
-sub log_writer_finish()
-{
-  return unless $log_pipe_pid;
-
-  close($log_pipe_in);
-
-  my $logger_failed = 0;
-  my $read_result = log_writer_read_output(600);
-  if ($read_result == -1) {
-    $logger_failed = -1;
-    Log (undef, "timed out reading from 'arv-put'");
-  } elsif ($read_result != 0) {
-    $logger_failed = -2;
-    Log(undef, "failed to read arv-put log manifest to EOF");
-  }
-
-  waitpid($log_pipe_pid, 0);
-  if ($?) {
-    $logger_failed ||= $?;
-    Log(undef, "log_writer_finish: arv-put exited " . exit_status_s($?))
-  }
-
-  close($log_pipe_out);
-  my $arv_put_output = $logger_failed ? undef : $log_pipe_out_buf;
-  $log_pipe_pid = $log_pipe_in = $log_pipe_out = $log_pipe_out_buf =
-      $log_pipe_out_select = undef;
-
-  return $arv_put_output;
-}
-
-sub log_writer_is_active() {
-  return $log_pipe_pid;
-}
-
-sub Log                                # ($jobstepidx, $logmessage)
-{
-  my ($jobstepidx, $logmessage) = @_;
-  if ($logmessage =~ /\n/) {
-    for my $line (split (/\n/, $_[1])) {
-      Log ($jobstepidx, $line);
-    }
-    return;
-  }
-  my $fh = select STDERR; $|=1; select $fh;
-  my $task_qseq = '';
-  if (defined($jobstepidx) && exists($jobstep[$jobstepidx]->{arvados_task})) {
-    $task_qseq = $jobstepidx;
-  }
-  my $message = sprintf ("%s %d %s %s", $job_id, $$, $task_qseq, $logmessage);
-  $message =~ s{([^ -\176])}{"\\" . sprintf ("%03o", ord($1))}ge;
-  $message .= "\n";
-  my $datetime;
-  if (log_writer_is_active() || -t STDERR) {
-    my @gmtime = gmtime;
-    $datetime = sprintf ("%04d-%02d-%02d_%02d:%02d:%02d",
-                        $gmtime[5]+1900, $gmtime[4]+1, @gmtime[3,2,1,0]);
-  }
-  print STDERR ((-t STDERR) ? ($datetime." ".$message) : $message);
-
-  if (log_writer_is_active()) {
-    log_writer_send($datetime . " " . $message);
-  }
-}
-
-
-sub croak
-{
-  my ($package, $file, $line) = caller;
-  my $message = "@_ at $file line $line\n";
-  Log (undef, $message);
-  release_allocation();
-  freeze() if @jobstep_todo;
-  create_output_collection() if @jobstep_todo;
-  cleanup();
-  save_meta();
-  die;
-}
-
-
-sub cleanup
-{
-  return unless $Job;
-  if ($Job->{'state'} eq 'Cancelled') {
-    $Job->update_attributes('finished_at' => scalar gmtime);
-  } else {
-    $Job->update_attributes('state' => 'Failed');
-  }
-}
-
-
-sub save_meta
-{
-  my $justcheckpoint = shift; # false if this will be the last meta saved
-  return if $justcheckpoint;  # checkpointing is not relevant post-Warehouse.pm
-  return unless log_writer_is_active();
-  my $log_manifest = log_writer_finish();
-  return unless defined($log_manifest);
-
-  if ($Job->{log}) {
-    my $prev_log_coll = api_call("collections/get", uuid => $Job->{log});
-    $log_manifest = $prev_log_coll->{manifest_text} . $log_manifest;
-  }
-
-  my $log_coll = api_call(
-    "collections/create", ensure_unique_name => 1, collection => {
-      manifest_text => $log_manifest,
-      owner_uuid => $Job->{owner_uuid},
-      name => sprintf("Log from %s job %s", $Job->{script}, $Job->{uuid}),
-    });
-  Log(undef, "log collection is " . $log_coll->{portable_data_hash});
-  $Job->update_attributes('log' => $log_coll->{portable_data_hash});
-
-  return $log_coll->{portable_data_hash};
-}
-
-
-sub freeze_if_want_freeze
-{
-  if ($main::please_freeze)
-  {
-    release_allocation();
-    if (@_)
-    {
-      # kill some srun procs before freeze+stop
-      map { $proc{$_} = {} } @_;
-      while (%proc)
-      {
-       killem (keys %proc);
-       select (undef, undef, undef, 0.1);
-       my $died;
-       while (($died = waitpid (-1, WNOHANG)) > 0)
-       {
-         delete $proc{$died};
-       }
-      }
-    }
-    freeze();
-    create_output_collection();
-    cleanup();
-    save_meta();
-    exit 1;
-  }
-}
-
-
-sub freeze
-{
-  Log (undef, "Freeze not implemented");
-  return;
-}
-
-
-sub thaw
-{
-  croak ("Thaw not implemented");
-}
-
-
-sub freezequote
-{
-  my $s = shift;
-  $s =~ s/\\/\\\\/g;
-  $s =~ s/\n/\\n/g;
-  return $s;
-}
-
-
-sub freezeunquote
-{
-  my $s = shift;
-  $s =~ s{\\(.)}{$1 eq "n" ? "\n" : $1}ge;
-  return $s;
-}
-
-sub srun_sync
-{
-  my $srunargs = shift;
-  my $execargs = shift;
-  my $opts = shift || {};
-  my $stdin = shift;
-
-  my $label = exists $opts->{label} ? $opts->{label} : "@$execargs";
-  Log (undef, "$label: start");
-
-  my ($stderr_r, $stderr_w);
-  pipe $stderr_r, $stderr_w or croak("pipe() failed: $!");
-
-  my ($stdout_r, $stdout_w);
-  pipe $stdout_r, $stdout_w or croak("pipe() failed: $!");
-
-  my $started_srun = scalar time;
-
-  my $srunpid = fork();
-  if ($srunpid == 0)
-  {
-    close($stderr_r);
-    close($stdout_r);
-    fcntl($stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
-    fcntl($stdout_w, F_SETFL, 0) or croak($!);
-    open(STDERR, ">&", $stderr_w) or croak ($!);
-    open(STDOUT, ">&", $stdout_w) or croak ($!);
-    srun ($srunargs, $execargs, $opts, $stdin);
-    exit (1);
-  }
-  close($stderr_w);
-  close($stdout_w);
-
-  set_nonblocking($stderr_r);
-  set_nonblocking($stdout_r);
-
-  # Add entries to @jobstep and %proc so check_squeue() and
-  # freeze_if_want_freeze() can treat it like a job task process.
-  push @jobstep, {
-    stderr => '',
-    stderr_at => 0,
-    stderr_captured => '',
-    stdout_r => $stdout_r,
-    stdout_captured => '',
-  };
-  my $jobstepidx = $#jobstep;
-  $proc{$srunpid} = {
-    jobstepidx => $jobstepidx,
-  };
-  $reader{$jobstepidx} = $stderr_r;
-
-  while ($srunpid != waitpid ($srunpid, WNOHANG)) {
-    my $busy = readfrompipes();
-    if (!$busy || ($latest_refresh + 2 < scalar time)) {
-      check_refresh_wanted();
-      check_squeue();
-      check_sinfo();
-    }
-    if (!$busy) {
-      select(undef, undef, undef, 0.1);
-    }
-    if (($started_srun + $srun_sync_timeout) < scalar time) {
-      # Exceeded general timeout for "srun_sync" operations, likely
-      # means something got stuck on the remote node.
-      Log(undef, "srun_sync exceeded timeout, will fail.");
-      $main::please_freeze = 1;
-    }
-    killem(keys %proc) if $main::please_freeze;
-  }
-  my $exited = $?;
-
-  readfrompipes_after_exit ($jobstepidx);
-
-  Log (undef, "$label: exit ".exit_status_s($exited));
-
-  close($stdout_r);
-  close($stderr_r);
-  delete $proc{$srunpid};
-  delete $reader{$jobstepidx};
-
-  my $j = pop @jobstep;
-  # If the srun showed signs of tempfail, ensure the caller treats that as a
-  # failure case.
-  if ($main::please_freeze || $j->{tempfail}) {
-    $exited ||= 255;
-  }
-  return ($exited, $j->{stdout_captured}, $j->{stderr_captured}, $j->{tempfail});
-}
-
-
-sub srun
-{
-  my $srunargs = shift;
-  my $execargs = shift;
-  my $opts = shift || {};
-  my $stdin = shift;
-  my $args = $have_slurm ? [@$srunargs, @$execargs] : $execargs;
-
-  $Data::Dumper::Terse = 1;
-  $Data::Dumper::Indent = 0;
-  my $show_cmd = Dumper($args);
-  $show_cmd =~ s/(TOKEN\\*=)[^\s\']+/${1}[...]/g;
-  $show_cmd =~ s/\n/ /g;
-  if ($opts->{fork}) {
-    Log(undef, "starting: $show_cmd");
-  } else {
-    # This is a child process: parent is in charge of reading our
-    # stderr and copying it to Log() if needed.
-    warn "starting: $show_cmd\n";
-  }
-
-  if (defined $stdin) {
-    my $child = open STDIN, "-|";
-    defined $child or die "no fork: $!";
-    if ($child == 0) {
-      print $stdin or die $!;
-      close STDOUT or die $!;
-      exit 0;
-    }
-  }
-
-  return system (@$args) if $opts->{fork};
-
-  exec @$args;
-  warn "ENV size is ".length(join(" ",%ENV));
-  die "exec failed: $!: @$args";
-}
-
-
-sub ban_node_by_slot {
-  # Don't start any new jobsteps on this node for 60 seconds
-  my $slotid = shift;
-  $slot[$slotid]->{node}->{hold_until} = 60 + scalar time;
-  $slot[$slotid]->{node}->{hold_count}++;
-  Log (undef, "backing off node " . $slot[$slotid]->{node}->{name} . " for 60 seconds");
-}
-
-sub must_lock_now
-{
-  my ($lockfile, $error_message) = @_;
-  open L, ">", $lockfile or croak("$lockfile: $!");
-  if (!flock L, LOCK_EX|LOCK_NB) {
-    croak("Can't lock $lockfile: $error_message\n");
-  }
-}
-
-sub find_docker_image {
-  # Given a Keep locator, check to see if it contains a Docker image.
-  # If so, return its stream name and Docker hash.
-  # If not, return undef for both values.
-  my $locator = shift;
-  my ($streamname, $filename);
-  my $image = api_call("collections/get", uuid => $locator);
-  if ($image) {
-    foreach my $line (split(/\n/, $image->{manifest_text})) {
-      my @tokens = split(/\s+/, $line);
-      next if (!@tokens);
-      $streamname = shift(@tokens);
-      foreach my $filedata (grep(/^\d+:\d+:/, @tokens)) {
-        if (defined($filename)) {
-          return (undef, undef);  # More than one file in the Collection.
-        } else {
-          $filename = (split(/:/, $filedata, 3))[2];
-          $filename =~ s/\\([0-3][0-7][0-7])/chr(oct($1))/ge;
-        }
-      }
-    }
-  }
-  if (defined($filename) and ($filename =~ /^((?:sha256:)?[0-9A-Fa-f]{64})\.tar$/)) {
-    return ($streamname, $1);
-  } else {
-    return (undef, undef);
-  }
-}
-
-sub exit_retry_unlocked {
-  Log(undef, "Transient failure with lock acquired; asking for re-dispatch by exiting ".EX_RETRY_UNLOCKED);
-  exit(EX_RETRY_UNLOCKED);
-}
-
-sub retry_count {
-  # Calculate the number of times an operation should be retried,
-  # assuming exponential backoff, and that we're willing to retry as
-  # long as tasks have been running.  Enforce a minimum of 3 retries.
-  my ($starttime, $endtime, $timediff, $retries);
-  if (@jobstep) {
-    $starttime = $jobstep[0]->{starttime};
-    $endtime = $jobstep[-1]->{finishtime};
-  }
-  if (!defined($starttime)) {
-    $timediff = 0;
-  } elsif (!defined($endtime)) {
-    $timediff = time - $starttime;
-  } else {
-    $timediff = ($endtime - $starttime) - (time - $endtime);
-  }
-  if ($timediff > 0) {
-    $retries = int(log($timediff) / log(2));
-  } else {
-    $retries = 1;  # Use the minimum.
-  }
-  return ($retries > 3) ? $retries : 3;
-}
-
-sub retry_op {
-  # Pass in two function references.
-  # This method will be called with the remaining arguments.
-  # If it dies, retry it with exponential backoff until it succeeds,
-  # or until the current retry_count is exhausted.  After each failure
-  # that can be retried, the second function will be called with
-  # the current try count (0-based), next try time, and error message.
-  my $operation = shift;
-  my $op_text = shift;
-  my $retries = retry_count();
-  my $retry_callback = sub {
-    my ($try_count, $next_try_at, $errmsg) = @_;
-    $errmsg =~ s/\s*\bat \Q$0\E line \d+\.?\s*//;
-    $errmsg =~ s/\s/ /g;
-    $errmsg =~ s/\s+$//;
-    my $retry_msg;
-    if ($next_try_at < time) {
-      $retry_msg = "Retrying.";
-    } else {
-      my $next_try_fmt = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($next_try_at);
-      $retry_msg = "Retrying at $next_try_fmt.";
-    }
-    Log(undef, "$op_text failed: $errmsg. $retry_msg");
-  };
-  foreach my $try_count (0..$retries) {
-    my $next_try = time + (2 ** $try_count);
-    my $result = eval { $operation->(@_); };
-    if (!$@) {
-      return $result;
-    } elsif ($try_count < $retries) {
-      $retry_callback->($try_count, $next_try, $@);
-      my $sleep_time = $next_try - time;
-      sleep($sleep_time) if ($sleep_time > 0);
-    }
-  }
-  # Ensure the error message ends in a newline, so Perl doesn't add
-  # retry_op's line number to it.
-  chomp($@);
-  die($@ . "\n");
-}
-
-sub api_call {
-  # Pass in a /-separated API method name, and arguments for it.
-  # This function will call that method, retrying as needed until
-  # the current retry_count is exhausted, with a log on the first failure.
-  my $method_name = shift;
-  my $method = $arv;
-  foreach my $key (split(/\//, $method_name)) {
-    $method = $method->{$key};
-  }
-  return retry_op(sub { $method->execute(@_); }, "API method $method_name", @_);
-}
-
-sub exit_status_s {
-  # Given a $?, return a human-readable exit code string like "0" or
-  # "1" or "0 with signal 1" or "1 with signal 11".
-  my $exitcode = shift;
-  my $s = $exitcode >> 8;
-  if ($exitcode & 0x7f) {
-    $s .= " with signal " . ($exitcode & 0x7f);
-  }
-  if ($exitcode & 0x80) {
-    $s .= " with core dump";
-  }
-  return $s;
-}
-
-sub handle_readall {
-  # Pass in a glob reference to a file handle.
-  # Read all its contents and return them as a string.
-  my $fh_glob_ref = shift;
-  local $/ = undef;
-  return <$fh_glob_ref>;
-}
-
-sub tar_filename_n {
-  my $n = shift;
-  return sprintf("%s/git.%s.%d.tar", $ENV{CRUNCH_TMP}, $job_id, $n);
-}
-
-sub add_git_archive {
-  # Pass in a git archive command as a string or list, a la system().
-  # This method will save its output to be included in the archive sent to the
-  # build script.
-  my $git_input;
-  $git_tar_count++;
-  if (!open(GIT_ARCHIVE, ">", tar_filename_n($git_tar_count))) {
-    croak("Failed to save git archive: $!");
-  }
-  my $git_pid = open2(">&GIT_ARCHIVE", $git_input, @_);
-  close($git_input);
-  waitpid($git_pid, 0);
-  close(GIT_ARCHIVE);
-  if ($?) {
-    croak("Failed to save git archive: git exited " . exit_status_s($?));
-  }
-}
-
-sub combined_git_archive {
-  # Combine all saved tar archives into a single archive, then return its
-  # contents in a string.  Return undef if no archives have been saved.
-  if ($git_tar_count < 1) {
-    return undef;
-  }
-  my $base_tar_name = tar_filename_n(1);
-  foreach my $tar_to_append (map { tar_filename_n($_); } (2..$git_tar_count)) {
-    my $tar_exit = system("tar", "-Af", $base_tar_name, $tar_to_append);
-    if ($tar_exit != 0) {
-      croak("Error preparing build archive: tar -A exited " .
-            exit_status_s($tar_exit));
-    }
-  }
-  if (!open(GIT_TAR, "<", $base_tar_name)) {
-    croak("Could not open build archive: $!");
-  }
-  my $tar_contents = handle_readall(\*GIT_TAR);
-  close(GIT_TAR);
-  return $tar_contents;
-}
-
-sub set_nonblocking {
-  my $fh = shift;
-  my $flags = fcntl ($fh, F_GETFL, 0) or croak ($!);
-  fcntl ($fh, F_SETFL, $flags | O_NONBLOCK) or croak ($!);
-}
-
-__DATA__
-#!/usr/bin/env perl
-#
-# This is crunch-job's internal dispatch script.  crunch-job running on the API
-# server invokes this script on individual compute nodes, or localhost if we're
-# running a job locally.  It gets called in two modes:
-#
-# * No arguments: Installation mode.  Read a tar archive from the DATA
-#   file handle; it includes the Crunch script's source code, and
-#   maybe SDKs as well.  Those should be installed in the proper
-#   locations.  This runs outside of any Docker container, so don't try to
-#   introspect Crunch's runtime environment.
-#
-# * With arguments: Crunch script run mode.  This script should set up the
-#   environment, then run the command specified in the arguments.  This runs
-#   inside any Docker container.
-
-use Fcntl ':flock';
-use File::Path qw( make_path remove_tree );
-use POSIX qw(getcwd);
-
-use constant TASK_TEMPFAIL => 111;
-
-# Map SDK subdirectories to the path environments they belong to.
-my %SDK_ENVVARS = ("perl/lib" => "PERLLIB", "ruby/lib" => "RUBYLIB");
-
-my $destdir = $ENV{"CRUNCH_SRC"};
-my $archive_hash = $ENV{"CRUNCH_GIT_ARCHIVE_HASH"};
-my $repo = $ENV{"CRUNCH_SRC_URL"};
-my $install_dir = $ENV{"CRUNCH_INSTALL"} || (getcwd() . "/opt");
-my $job_work = $ENV{"JOB_WORK"};
-my $task_work = $ENV{"TASK_WORK"};
-
-open(STDOUT_ORIG, ">&", STDOUT);
-open(STDERR_ORIG, ">&", STDERR);
-
-for my $dir ($destdir, $job_work, $task_work) {
-  if ($dir) {
-    make_path $dir;
-    -e $dir or die "Failed to create temporary directory ($dir): $!";
-  }
-}
-
-if ($task_work) {
-  remove_tree($task_work, {keep_root => 1});
-}
-
-### Crunch script run mode
-if (@ARGV) {
-  # We want to do routine logging during task 0 only.  This gives the user
-  # the information they need, but avoids repeating the information for every
-  # task.
-  my $Log;
-  if ($ENV{TASK_SEQUENCE} eq "0") {
-    $Log = sub {
-      my $msg = shift;
-      printf STDERR_ORIG "[Crunch] $msg\n", @_;
-    };
-  } else {
-    $Log = sub { };
-  }
-
-  my $python_src = "$install_dir/python";
-  my $venv_dir = "$job_work/.arvados.venv";
-  my $venv_built = -e "$venv_dir/bin/activate";
-  if ((!$venv_built) and (-d $python_src) and can_run("virtualenv")) {
-    shell_or_die(undef, "virtualenv", "--quiet", "--system-site-packages",
-                 "--python=python2.7", $venv_dir);
-    shell_or_die(TASK_TEMPFAIL, "$venv_dir/bin/pip", "--quiet", "install", "-I", $python_src);
-    $venv_built = 1;
-    $Log->("Built Python SDK virtualenv");
-  }
-
-  my @pysdk_version_cmd = ("python", "-c",
-    "from pkg_resources import get_distribution as get; print get('arvados-python-client').version");
-  if ($venv_built) {
-    $Log->("Running in Python SDK virtualenv");
-    @pysdk_version_cmd = ();
-    my $orig_argv = join(" ", map { quotemeta($_); } @ARGV);
-    @ARGV = ("/bin/sh", "-ec",
-             ". \Q$venv_dir/bin/activate\E; exec $orig_argv");
-  } elsif (-d $python_src) {
-    $Log->("Warning: virtualenv not found inside Docker container default " .
-           "\$PATH. Can't install Python SDK.");
-  }
-
-  if (@pysdk_version_cmd) {
-    open(my $pysdk_version_pipe, "-|", @pysdk_version_cmd);
-    my $pysdk_version = <$pysdk_version_pipe>;
-    close($pysdk_version_pipe);
-    if ($? == 0) {
-      chomp($pysdk_version);
-      $Log->("Using Arvados SDK version $pysdk_version");
-    } else {
-      # A lot could've gone wrong here, but pretty much all of it means that
-      # Python won't be able to load the Arvados SDK.
-      $Log->("Warning: Arvados SDK not found");
-    }
-  }
-
-  while (my ($sdk_dir, $sdk_envkey) = each(%SDK_ENVVARS)) {
-    my $sdk_path = "$install_dir/$sdk_dir";
-    if (-d $sdk_path) {
-      if ($ENV{$sdk_envkey}) {
-        $ENV{$sdk_envkey} = "$sdk_path:" . $ENV{$sdk_envkey};
-      } else {
-        $ENV{$sdk_envkey} = $sdk_path;
-      }
-      $Log->("Arvados SDK added to %s", $sdk_envkey);
-    }
-  }
-
-  exec(@ARGV);
-  die "Cannot exec `@ARGV`: $!";
-}
-
-### Installation mode
-open L, ">", "$destdir.lock" or die "$destdir.lock: $!";
-flock L, LOCK_EX;
-if (readlink ("$destdir.archive_hash") eq $archive_hash && -d $destdir) {
-  # This exact git archive (source + arvados sdk) is already installed
-  # here, so there's no need to reinstall it.
-
-  # We must consume our DATA section, though: otherwise the process
-  # feeding it to us will get SIGPIPE.
-  my $buf;
-  while (read(DATA, $buf, 65536)) { }
-
-  exit(0);
-}
-
-unlink "$destdir.archive_hash";
-mkdir $destdir;
-
-do {
-  # Ignore SIGPIPE: we check retval of close() instead. See perlipc(1).
-  local $SIG{PIPE} = "IGNORE";
-  warn "Extracting archive: $archive_hash\n";
-  # --ignore-zeros is necessary sometimes: depending on how much NUL
-  # padding tar -A put on our combined archive (which in turn depends
-  # on the length of the component archives) tar without
-  # --ignore-zeros will exit before consuming stdin and cause close()
-  # to fail on the resulting SIGPIPE.
-  if (!open(TARX, "|-", "tar", "--ignore-zeros", "-xC", $destdir)) {
-    die "Error launching 'tar -xC $destdir': $!";
-  }
-  # If we send too much data to tar in one write (> 4-5 MiB), it stops, and we
-  # get SIGPIPE.  We must feed it data incrementally.
-  my $tar_input;
-  while (read(DATA, $tar_input, 65536)) {
-    print TARX $tar_input;
-  }
-  if(!close(TARX)) {
-    die "'tar -xC $destdir' exited $?: $!";
-  }
-};
-
-mkdir $install_dir;
-
-my $sdk_root = "$destdir/.arvados.sdk/sdk";
-if (-d $sdk_root) {
-  foreach my $sdk_lang (("python",
-                         map { (split /\//, $_, 2)[0]; } keys(%SDK_ENVVARS))) {
-    if (-d "$sdk_root/$sdk_lang") {
-      if (!rename("$sdk_root/$sdk_lang", "$install_dir/$sdk_lang")) {
-        die "Failed to install $sdk_lang SDK: $!";
-      }
-    }
-  }
-}
-
-my $python_dir = "$install_dir/python";
-if ((-d $python_dir) and can_run("python2.7")) {
-  open(my $egg_info_pipe, "-|",
-       "python2.7 \Q$python_dir/setup.py\E egg_info 2>&1 >/dev/null");
-  my @egg_info_errors = <$egg_info_pipe>;
-  close($egg_info_pipe);
-
-  if ($?) {
-    if (@egg_info_errors and (($egg_info_errors[-1] =~ /\bgit\b/) or ($egg_info_errors[-1] =~ /\[Errno 2\]/))) {
-      # egg_info apparently failed because it couldn't ask git for a build tag.
-      # Specify no build tag.
-      open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
-      print $pysdk_cfg "\n[egg_info]\ntag_build =\n";
-      close($pysdk_cfg);
-    } else {
-      my $egg_info_exit = $? >> 8;
-      foreach my $errline (@egg_info_errors) {
-        warn $errline;
-      }
-      warn "python setup.py egg_info failed: exit $egg_info_exit";
-      exit ($egg_info_exit || 1);
-    }
-  }
-}
-
-# Hide messages from the install script (unless it fails: shell_or_die
-# will show $destdir.log in that case).
-open(STDOUT, ">>", "$destdir.log") or die ($!);
-open(STDERR, ">&", STDOUT) or die ($!);
-
-if (-e "$destdir/crunch_scripts/install") {
-    shell_or_die (undef, "$destdir/crunch_scripts/install", $install_dir);
-} elsif (!-e "./install.sh" && -e "./tests/autotests.sh") {
-    # Old version
-    shell_or_die (undef, "./tests/autotests.sh", $install_dir);
-} elsif (-e "./install.sh") {
-    shell_or_die (undef, "./install.sh", $install_dir);
-}
-
-if ($archive_hash) {
-    unlink "$destdir.archive_hash.new";
-    symlink ($archive_hash, "$destdir.archive_hash.new") or die "$destdir.archive_hash.new: $!";
-    rename ("$destdir.archive_hash.new", "$destdir.archive_hash") or die "$destdir.archive_hash: $!";
-}
-
-close L;
-
-sub can_run {
-  my $command_name = shift;
-  open(my $which, "-|", "which", $command_name) or die ($!);
-  while (<$which>) { }
-  close($which);
-  return ($? == 0);
-}
-
-sub shell_or_die
-{
-  my $exitcode = shift;
-
-  if ($ENV{"DEBUG"}) {
-    print STDERR "@_\n";
-  }
-  if (system (@_) != 0) {
-    my $err = $!;
-    my $code = $?;
-    my $exitstatus = sprintf("exit %d signal %d", $code >> 8, $code & 0x7f);
-    open STDERR, ">&STDERR_ORIG";
-    system ("cat $destdir.log >&2");
-    warn "@_ failed ($err): $exitstatus";
-    if (defined($exitcode)) {
-      exit $exitcode;
-    }
-    else {
-      exit (($code >> 8) || 1);
-    }
-  }
-}
-
-__DATA__
diff --git a/sdk/cli/test/test_arv-run-pipeline-instance.rb b/sdk/cli/test/test_arv-run-pipeline-instance.rb
deleted file mode 100644 (file)
index b6a0328..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-require 'minitest/autorun'
-
-class TestRunPipelineInstance < Minitest::Test
-  def setup
-  end
-
-  def test_run_pipeline_instance_get_help
-    out, err = capture_subprocess_io do
-      system ('arv-run-pipeline-instance -h')
-    end
-    assert_equal '', err
-  end
-
-  def test_run_pipeline_instance_with_no_such_option
-    out, err = capture_subprocess_io do
-      system ('arv-run-pipeline-instance --junk')
-    end
-    refute_equal '', err
-  end
-
-  def test_run_pipeline_instance_for_bogus_template_uuid
-    out, err = capture_subprocess_io do
-      # fails with error SSL_connect error because HOST_INSECURE is not being used
-         # system ('arv-run-pipeline-instance --template bogus-abcde-fghijklmnopqrs input=c1bad4b39ca5a924e481008009d94e32+210')
-
-      # fails with error: fatal: cannot load such file -- arvados
-         # system ('./bin/arv-run-pipeline-instance --template bogus-abcde-fghijklmnopqrs input=c1bad4b39ca5a924e481008009d94e32+210')
-    end
-    #refute_equal '', err
-    assert_equal '', err
-  end
-
-end
diff --git a/sdk/cli/test/test_crunch-job.rb b/sdk/cli/test/test_crunch-job.rb
deleted file mode 100644 (file)
index 5f111e7..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-require 'minitest/autorun'
-
-class TestCrunchJob < Minitest::Test
-  SPECIAL_EXIT = {
-    EX_RETRY_UNLOCKED: 93,
-    EX_TEMPFAIL: 75,
-  }
-
-  JOBSPEC = {
-    grep_local: {
-      owner_uuid: 'zzzzz-j7d0g-it30l961gq3t0oi',
-      script: 'grep',
-      script_version: 'master',
-      repository: File.absolute_path('../../../..', __FILE__),
-      script_parameters: {foo: 'bar'},
-    },
-  }
-
-  def setup
-  end
-
-  def crunchjob
-    File.absolute_path '../../bin/crunch-job', __FILE__
-  end
-
-  # Return environment suitable for running crunch-job.
-  def crunchenv opts={}
-    env = ENV.to_h
-    env['CRUNCH_REFRESH_TRIGGER'] =
-      File.absolute_path('../../../../tmp/crunch-refresh-trigger', __FILE__)
-    env
-  end
-
-  def jobspec label
-    JOBSPEC[label].dup
-  end
-
-  # Encode job record to json and run it with crunch-job.
-  #
-  # opts[:binstubs] is an array of X where ./binstub_X is added to
-  # PATH in order to mock system programs.
-  def tryjobrecord jobrecord, opts={}
-    env = crunchenv
-    (opts[:binstubs] || []).each do |binstub|
-      env['PATH'] = File.absolute_path('../binstub_'+binstub, __FILE__) + ':' + env['PATH']
-    end
-    system env, crunchjob, '--job', jobrecord.to_json
-  end
-
-  def test_bogus_json
-    out, err = capture_subprocess_io do
-      system crunchenv, crunchjob, '--job', '"}{"'
-    end
-    assert_equal false, $?.success?
-    # Must not conflict with our special exit statuses
-    assert_jobfail $?
-    assert_match /JSON/, err
-  end
-
-  def test_fail_sanity_check
-    out, err = capture_subprocess_io do
-      j = {}
-      tryjobrecord j, binstubs: ['sanity_check']
-    end
-    assert_equal 75, $?.exitstatus
-    assert_match /Sanity check failed: 7/, err
-  end
-
-  def test_fail_docker_sanity_check
-    out, err = capture_subprocess_io do
-      j = {}
-      j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
-      tryjobrecord j, binstubs: ['sanity_check']
-    end
-    assert_equal 75, $?.exitstatus
-    assert_match /Sanity check failed: 8/, err
-  end
-
-  def test_no_script_specified
-    out, err = capture_subprocess_io do
-      j = jobspec :grep_local
-      j.delete :script
-      tryjobrecord j
-    end
-    assert_match /No script specified/, err
-    assert_jobfail $?
-  end
-
-  def test_fail_clean_tmp
-    out, err = capture_subprocess_io do
-      j = jobspec :grep_local
-      tryjobrecord j, binstubs: ['clean_fail']
-    end
-    assert_match /Failing mount stub was called/, err
-    assert_match /clean work dirs: exit 44\n.*Transient failure.* exiting 93\n(.*arv_put.*INFO.*\n)?$/, err
-    assert_equal SPECIAL_EXIT[:EX_RETRY_UNLOCKED], $?.exitstatus
-  end
-
-  def test_output_collection_owner_uuid
-    skip "Depends on a post 1.3 python-arvados-python-client package being installed"
-
-    j = jobspec :grep_local
-    out, err = capture_subprocess_io do
-      tryjobrecord j, binstubs: ['arv-mount', 'output_coll_owner']
-    end
-    assert_match /owner_uuid: #{j['owner_uuid']}/, err
-  end
-
-  def test_docker_image_missing
-    skip 'API bug: it refuses to create this job in Running state'
-    out, err = capture_subprocess_io do
-      j = jobspec :grep_local
-      j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
-      tryjobrecord j, binstubs: ['docker_noop']
-    end
-    assert_match /No Docker image hash found from locator/, err
-    assert_jobfail $?
-  end
-
-  def test_script_version_not_found_in_repository
-    bogus_version = 'f8b72707c1f5f740dbf1ed56eb429a36e0dee770'
-    out, err = capture_subprocess_io do
-      j = jobspec :grep_local
-      j[:script_version] = bogus_version
-      tryjobrecord j, binstubs: ['arv-mount']
-    end
-    assert_match /'#{bogus_version}' not found, giving up/, err
-    assert_jobfail $?
-  end
-
-  # Ensure procstatus is not interpreted as a temporary infrastructure
-  # problem. Would be assert_http_4xx if this were http.
-  def assert_jobfail procstatus
-    refute_includes SPECIAL_EXIT.values, procstatus.exitstatus
-    assert_equal false, procstatus.success?
-  end
-end
index 1f8edb70dbe6d0a85df7e090f645efbe8417e3af..4c983858020ba52a8610c3b37e274e2d3643e487 100644 (file)
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Implement cwl-runner interface for submitting and running work on Arvados, using
-# either the Crunch jobs API or Crunch containers API.
+# the Crunch containers API.
 
 from future.utils import viewitems
 from builtins import str
@@ -39,7 +39,6 @@ from .executor import ArvCwlExecutor
 # These arn't used directly in this file but
 # other code expects to import them from here
 from .arvcontainer import ArvadosContainer
-from .arvjob import ArvadosJob
 from .arvtool import ArvadosCommandTool
 from .fsaccess import CollectionFsAccess, CollectionCache, CollectionFetcher
 from .util import get_current_container
@@ -97,32 +96,32 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--enable-reuse", action="store_true",
                         default=True, dest="enable_reuse",
-                        help="Enable job or container reuse (default)")
+                        help="Enable container reuse (default)")
     exgroup.add_argument("--disable-reuse", action="store_false",
                         default=True, dest="enable_reuse",
-                        help="Disable job or container reuse")
+                        help="Disable container reuse")
 
-    parser.add_argument("--project-uuid", metavar="UUID", help="Project that will own the workflow jobs, if not provided, will go to home project.")
+    parser.add_argument("--project-uuid", metavar="UUID", help="Project that will own the workflow containers, if not provided, will go to home project.")
     parser.add_argument("--output-name", help="Name to use for collection that stores the final output.", default=None)
     parser.add_argument("--output-tags", help="Tags for the final output collection separated by commas, e.g., '--output-tags tag0,tag1,tag2'.", default=None)
     parser.add_argument("--ignore-docker-for-reuse", action="store_true",
-                        help="Ignore Docker image version when deciding whether to reuse past jobs.",
+                        help="Ignore Docker image version when deciding whether to reuse past containers.",
                         default=False)
 
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--submit", action="store_true", help="Submit workflow to run on Arvados.",
                         default=True, dest="submit")
-    exgroup.add_argument("--local", action="store_false", help="Run workflow on local host (submits jobs to Arvados).",
+    exgroup.add_argument("--local", action="store_false", help="Run workflow on local host (submits containers to Arvados).",
                         default=True, dest="submit")
     exgroup.add_argument("--create-template", action="store_true", help="(Deprecated) synonym for --create-workflow.",
                          dest="create_workflow")
-    exgroup.add_argument("--create-workflow", action="store_true", help="Create an Arvados workflow (if using the 'containers' API) or pipeline template (if using the 'jobs' API). See --api.")
-    exgroup.add_argument("--update-workflow", metavar="UUID", help="Update an existing Arvados workflow or pipeline template with the given UUID.")
+    exgroup.add_argument("--create-workflow", action="store_true", help="Register an Arvados workflow that can be run from Workbench")
+    exgroup.add_argument("--update-workflow", metavar="UUID", help="Update an existing Arvados workflow with the given UUID.")
 
     exgroup = parser.add_mutually_exclusive_group()
-    exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner job, wait for completion.",
+    exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner, wait for completion.",
                         default=True, dest="wait")
-    exgroup.add_argument("--no-wait", action="store_false", help="Submit workflow runner job and exit.",
+    exgroup.add_argument("--no-wait", action="store_false", help="Submit workflow runner and exit.",
                         default=True, dest="wait")
 
     exgroup = parser.add_mutually_exclusive_group()
@@ -133,8 +132,8 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
 
     parser.add_argument("--api",
                         default=None, dest="work_api",
-                        choices=("jobs", "containers"),
-                        help="Select work submission API.  Default is 'jobs' if that API is available, otherwise 'containers'.")
+                        choices=("containers",),
+                        help="Select work submission API.  Only supports 'containers'")
 
     parser.add_argument("--compute-checksum", action="store_true", default=False,
                         help="Compute checksum of contents while collecting outputs",
@@ -155,10 +154,10 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--submit-request-uuid",
                          default=None,
-                         help="Update and commit to supplied container request instead of creating a new one (containers API only).",
+                         help="Update and commit to supplied container request instead of creating a new one.",
                          metavar="UUID")
     exgroup.add_argument("--submit-runner-cluster",
-                         help="Submit workflow runner to a remote cluster (containers API only)",
+                         help="Submit workflow runner to a remote cluster",
                          default=None,
                          metavar="CLUSTER_ID")
 
@@ -186,7 +185,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         default=0)
 
     parser.add_argument("--priority", type=int,
-                        help="Workflow priority (range 1..1000, higher has precedence over lower, containers api only)",
+                        help="Workflow priority (range 1..1000, higher has precedence over lower)",
                         default=DEFAULT_PRIORITY)
 
     parser.add_argument("--disable-validate", dest="do_validate",
@@ -265,8 +264,6 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
     if arvargs.update_workflow:
         if arvargs.update_workflow.find('-7fd4e-') == 5:
             want_api = 'containers'
-        elif arvargs.update_workflow.find('-p5p6p-') == 5:
-            want_api = 'jobs'
         else:
             want_api = None
         if want_api and arvargs.work_api and want_api != arvargs.work_api:
@@ -300,7 +297,7 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
         return 1
 
     # Note that unless in debug mode, some stack traces related to user
-    # workflow errors may be suppressed. See ArvadosJob.done().
+    # workflow errors may be suppressed.
     if arvargs.debug:
         logger.setLevel(logging.DEBUG)
         logging.getLogger('arvados').setLevel(logging.DEBUG)
diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py
deleted file mode 100644 (file)
index 11efc0c..0000000
+++ /dev/null
@@ -1,495 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from past.builtins import basestring
-from builtins import object
-from future.utils import viewitems
-
-import logging
-import re
-import copy
-import json
-import time
-
-from cwltool.process import shortname, UnsupportedRequirement
-from cwltool.errors import WorkflowException
-from cwltool.command_line_tool import revmap_file, CommandLineTool
-from cwltool.load_tool import fetch_document
-from cwltool.builder import Builder
-from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class
-from cwltool.job import JobBase
-
-from schema_salad.sourceline import SourceLine
-
-import arvados_cwl.util
-import ruamel.yaml as yaml
-
-import arvados.collection
-from arvados.errors import ApiError
-
-from .arvdocker import arv_docker_get_image
-from .runner import Runner, arvados_jobs_image, packed_workflow, upload_workflow_collection, trim_anonymous_location, remove_redundant_fields
-from .pathmapper import VwdPathMapper, trim_listing
-from .perf import Perf
-from . import done
-from ._version import __version__
-from .util import get_intermediate_collection_info
-
-logger = logging.getLogger('arvados.cwl-runner')
-metrics = logging.getLogger('arvados.cwl-runner.metrics')
-
-crunchrunner_re = re.compile(r"^.*crunchrunner: \$\(task\.(tmpdir|outdir|keep)\)=(.*)$")
-
-crunchrunner_git_commit = 'a3f2cb186e437bfce0031b024b2157b73ed2717d'
-
-class ArvadosJob(JobBase):
-    """Submit and manage a Crunch job for executing a CWL CommandLineTool."""
-
-    def __init__(self, runner,
-                 builder,   # type: Builder
-                 joborder,  # type: Dict[Text, Union[Dict[Text, Any], List, Text]]
-                 make_path_mapper,  # type: Callable[..., PathMapper]
-                 requirements,      # type: List[Dict[Text, Text]]
-                 hints,     # type: List[Dict[Text, Text]]
-                 name       # type: Text
-    ):
-        super(ArvadosJob, self).__init__(builder, joborder, make_path_mapper, requirements, hints, name)
-        self.arvrunner = runner
-        self.running = False
-        self.uuid = None
-
-    def run(self, runtimeContext):
-        script_parameters = {
-            "command": self.command_line
-        }
-        runtime_constraints = {}
-
-        with Perf(metrics, "generatefiles %s" % self.name):
-            if self.generatefiles["listing"]:
-                vwd = arvados.collection.Collection(api_client=self.arvrunner.api,
-                                                    keep_client=self.arvrunner.keep_client,
-                                                    num_retries=self.arvrunner.num_retries)
-                script_parameters["task.vwd"] = {}
-                generatemapper = VwdPathMapper(self.generatefiles["listing"], "", "",
-                                               separateDirs=False)
-
-                with Perf(metrics, "createfiles %s" % self.name):
-                    for f, p in generatemapper.items():
-                        if p.type == "CreateFile":
-                            with vwd.open(p.target, "w") as n:
-                                n.write(p.resolved.encode("utf-8"))
-
-                if vwd:
-                    with Perf(metrics, "generatefiles.save_new %s" % self.name):
-                        info = get_intermediate_collection_info(self.name, None, runtimeContext.intermediate_output_ttl)
-                        vwd.save_new(name=info["name"],
-                                     owner_uuid=self.arvrunner.project_uuid,
-                                     ensure_unique_name=True,
-                                     trash_at=info["trash_at"],
-                                     properties=info["properties"])
-
-                for f, p in generatemapper.items():
-                    if p.type == "File":
-                        script_parameters["task.vwd"][p.target] = p.resolved
-                    if p.type == "CreateFile":
-                        script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target)
-
-        script_parameters["task.env"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
-        if self.environment:
-            script_parameters["task.env"].update(self.environment)
-
-        if self.stdin:
-            script_parameters["task.stdin"] = self.stdin
-
-        if self.stdout:
-            script_parameters["task.stdout"] = self.stdout
-
-        if self.stderr:
-            script_parameters["task.stderr"] = self.stderr
-
-        if self.successCodes:
-            script_parameters["task.successCodes"] = self.successCodes
-        if self.temporaryFailCodes:
-            script_parameters["task.temporaryFailCodes"] = self.temporaryFailCodes
-        if self.permanentFailCodes:
-            script_parameters["task.permanentFailCodes"] = self.permanentFailCodes
-
-        with Perf(metrics, "arv_docker_get_image %s" % self.name):
-            (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
-            if docker_req and runtimeContext.use_container is not False:
-                if docker_req.get("dockerOutputDirectory"):
-                    raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError(
-                        "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
-                runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api,
-                                                                           docker_req,
-                                                                           runtimeContext.pull_image,
-                                                                           self.arvrunner.project_uuid)
-            else:
-                runtime_constraints["docker_image"] = "arvados/jobs"
-
-        resources = self.builder.resources
-        if resources is not None:
-            runtime_constraints["min_cores_per_node"] = resources.get("cores", 1)
-            runtime_constraints["min_ram_mb_per_node"] = resources.get("ram")
-            runtime_constraints["min_scratch_mb_per_node"] = resources.get("tmpdirSize", 0) + resources.get("outdirSize", 0)
-
-        runtime_req, _ = self.get_requirement("http://arvados.org/cwl#RuntimeConstraints")
-        if runtime_req:
-            if "keep_cache" in runtime_req:
-                runtime_constraints["keep_cache_mb_per_task"] = runtime_req["keep_cache"]
-                runtime_constraints["min_ram_mb_per_node"] += runtime_req["keep_cache"]
-            if "outputDirType" in runtime_req:
-                if runtime_req["outputDirType"] == "local_output_dir":
-                    script_parameters["task.keepTmpOutput"] = False
-                elif runtime_req["outputDirType"] == "keep_output_dir":
-                    script_parameters["task.keepTmpOutput"] = True
-
-        filters = [["repository", "=", "arvados"],
-                   ["script", "=", "crunchrunner"],
-                   ["script_version", "in git", crunchrunner_git_commit]]
-        if not self.arvrunner.ignore_docker_for_reuse:
-            filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])
-
-        enable_reuse = runtimeContext.enable_reuse
-        if enable_reuse:
-            reuse_req, _ = self.get_requirement("http://arvados.org/cwl#ReuseRequirement")
-            if reuse_req:
-                enable_reuse = reuse_req["enableReuse"]
-
-        self.output_callback = self.arvrunner.get_wrapped_callback(self.output_callback)
-
-        try:
-            with Perf(metrics, "create %s" % self.name):
-                response = self.arvrunner.api.jobs().create(
-                    body={
-                        "owner_uuid": self.arvrunner.project_uuid,
-                        "script": "crunchrunner",
-                        "repository": "arvados",
-                        "script_version": "master",
-                        "minimum_script_version": crunchrunner_git_commit,
-                        "script_parameters": {"tasks": [script_parameters]},
-                        "runtime_constraints": runtime_constraints
-                    },
-                    filters=filters,
-                    find_or_create=enable_reuse
-                ).execute(num_retries=self.arvrunner.num_retries)
-
-            self.uuid = response["uuid"]
-            self.arvrunner.process_submitted(self)
-
-            self.update_pipeline_component(response)
-
-            if response["state"] == "Complete":
-                logger.info("%s reused job %s", self.arvrunner.label(self), response["uuid"])
-                # Give read permission to the desired project on reused jobs
-                if response["owner_uuid"] != self.arvrunner.project_uuid:
-                    try:
-                        self.arvrunner.api.links().create(body={
-                            'link_class': 'permission',
-                            'name': 'can_read',
-                            'tail_uuid': self.arvrunner.project_uuid,
-                            'head_uuid': response["uuid"],
-                            }).execute(num_retries=self.arvrunner.num_retries)
-                    except ApiError as e:
-                        # The user might not have "manage" access on the job: log
-                        # a message and continue.
-                        logger.info("Creating read permission on job %s: %s",
-                                    response["uuid"],
-                                    e)
-            else:
-                logger.info("%s %s is %s", self.arvrunner.label(self), response["uuid"], response["state"])
-        except Exception:
-            logger.exception("%s error" % (self.arvrunner.label(self)))
-            self.output_callback({}, "permanentFail")
-
-    def update_pipeline_component(self, record):
-        with self.arvrunner.workflow_eval_lock:
-            if self.arvrunner.pipeline:
-                self.arvrunner.pipeline["components"][self.name] = {"job": record}
-                with Perf(metrics, "update_pipeline_component %s" % self.name):
-                    self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().update(
-                        uuid=self.arvrunner.pipeline["uuid"],
-                        body={
-                            "components": self.arvrunner.pipeline["components"]
-                        }).execute(num_retries=self.arvrunner.num_retries)
-            if self.arvrunner.uuid:
-                try:
-                    job = self.arvrunner.api.jobs().get(uuid=self.arvrunner.uuid).execute()
-                    if job:
-                        components = job["components"]
-                        components[self.name] = record["uuid"]
-                        self.arvrunner.api.jobs().update(
-                            uuid=self.arvrunner.uuid,
-                            body={
-                                "components": components
-                            }).execute(num_retries=self.arvrunner.num_retries)
-                except Exception:
-                    logger.exception("Error adding to components")
-
-    def done(self, record):
-        try:
-            self.update_pipeline_component(record)
-        except:
-            pass
-
-        try:
-            if record["state"] == "Complete":
-                processStatus = "success"
-                # we don't have the real exit code so fake it.
-                record["exit_code"] = 0
-            else:
-                processStatus = "permanentFail"
-                record["exit_code"] = 1
-
-            outputs = {}
-            try:
-                if record["output"]:
-                    with Perf(metrics, "inspect log %s" % self.name):
-                        logc = arvados.collection.CollectionReader(record["log"],
-                                                                   api_client=self.arvrunner.api,
-                                                                   keep_client=self.arvrunner.keep_client,
-                                                                   num_retries=self.arvrunner.num_retries)
-                        log = logc.open(list(logc.keys())[0])
-                        dirs = {
-                            "tmpdir": "/tmpdir",
-                            "outdir": "/outdir",
-                            "keep": "/keep"
-                        }
-                        for l in log:
-                            # Determine the tmpdir, outdir and keep paths from
-                            # the job run.  Unfortunately, we can't take the first
-                            # values we find (which are expected to be near the
-                            # top) and stop scanning because if the node fails and
-                            # the job restarts on a different node these values
-                            # will different runs, and we need to know about the
-                            # final run that actually produced output.
-                            g = crunchrunner_re.match(l)
-                            if g:
-                                dirs[g.group(1)] = g.group(2)
-
-                    if processStatus == "permanentFail":
-                        done.logtail(logc, logger.error, "%s (%s) error log:" % (self.arvrunner.label(self), record["uuid"]), maxlen=40)
-
-                    with Perf(metrics, "output collection %s" % self.name):
-                        outputs = done.done(self, record, dirs["tmpdir"],
-                                            dirs["outdir"], dirs["keep"])
-            except WorkflowException as e:
-                # Only include a stack trace if in debug mode.
-                # This is most likely a user workflow error and a stack trace may obfuscate more useful output.
-                logger.error("%s unable to collect output from %s:\n%s",
-                             self.arvrunner.label(self), record["output"], e, exc_info=(e if self.arvrunner.debug else False))
-                processStatus = "permanentFail"
-            except Exception:
-                logger.exception("Got unknown exception while collecting output for job %s:", self.name)
-                processStatus = "permanentFail"
-
-            # Note: Currently, on error output_callback is expecting an empty dict,
-            # anything else will fail.
-            if not isinstance(outputs, dict):
-                logger.error("Unexpected output type %s '%s'", type(outputs), outputs)
-                outputs = {}
-                processStatus = "permanentFail"
-        finally:
-            self.output_callback(outputs, processStatus)
-
-
-class RunnerJob(Runner):
-    """Submit and manage a Crunch job that runs crunch_scripts/cwl-runner."""
-
-    def arvados_job_spec(self, debug=False):
-        """Create an Arvados job specification for this workflow.
-
-        The returned dict can be used to create a job (i.e., passed as
-        the +body+ argument to jobs().create()), or as a component in
-        a pipeline template or pipeline instance.
-        """
-
-        if self.embedded_tool.tool["id"].startswith("keep:"):
-            self.job_order["cwl:tool"] = self.embedded_tool.tool["id"][5:]
-        else:
-            packed = packed_workflow(self.arvrunner, self.embedded_tool, self.merged_map)
-            wf_pdh = upload_workflow_collection(self.arvrunner, self.name, packed)
-            self.job_order["cwl:tool"] = "%s/workflow.cwl#main" % wf_pdh
-
-        adjustDirObjs(self.job_order, trim_listing)
-        visit_class(self.job_order, ("File", "Directory"), trim_anonymous_location)
-        visit_class(self.job_order, ("File", "Directory"), remove_redundant_fields)
-
-        if self.output_name:
-            self.job_order["arv:output_name"] = self.output_name
-
-        if self.output_tags:
-            self.job_order["arv:output_tags"] = self.output_tags
-
-        self.job_order["arv:enable_reuse"] = self.enable_reuse
-
-        if self.on_error:
-            self.job_order["arv:on_error"] = self.on_error
-
-        if debug:
-            self.job_order["arv:debug"] = True
-
-        return {
-            "script": "cwl-runner",
-            "script_version": "master",
-            "minimum_script_version": "570509ab4d2ef93d870fd2b1f2eab178afb1bad9",
-            "repository": "arvados",
-            "script_parameters": self.job_order,
-            "runtime_constraints": {
-                "docker_image": arvados_jobs_image(self.arvrunner, self.jobs_image),
-                "min_ram_mb_per_node": self.submit_runner_ram
-            }
-        }
-
-    def run(self, runtimeContext):
-        job_spec = self.arvados_job_spec(runtimeContext.debug)
-
-        job_spec.setdefault("owner_uuid", self.arvrunner.project_uuid)
-
-        job = self.arvrunner.api.jobs().create(
-            body=job_spec,
-            find_or_create=self.enable_reuse
-        ).execute(num_retries=self.arvrunner.num_retries)
-
-        for k,v in viewitems(job_spec["script_parameters"]):
-            if v is False or v is None or isinstance(v, dict):
-                job_spec["script_parameters"][k] = {"value": v}
-
-        del job_spec["owner_uuid"]
-        job_spec["job"] = job
-
-        instance_spec = {
-            "owner_uuid": self.arvrunner.project_uuid,
-            "name": self.name,
-            "components": {
-                "cwl-runner": job_spec,
-            },
-            "state": "RunningOnServer",
-        }
-        if not self.enable_reuse:
-            instance_spec["properties"] = {"run_options": {"enable_job_reuse": False}}
-
-        self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().create(
-            body=instance_spec).execute(num_retries=self.arvrunner.num_retries)
-        logger.info("Created pipeline %s", self.arvrunner.pipeline["uuid"])
-
-        if runtimeContext.wait is False:
-            self.uuid = self.arvrunner.pipeline["uuid"]
-            return
-
-        self.uuid = job["uuid"]
-        self.arvrunner.process_submitted(self)
-
-
-class RunnerTemplate(object):
-    """An Arvados pipeline template that invokes a CWL workflow."""
-
-    type_to_dataclass = {
-        'boolean': 'boolean',
-        'File': 'File',
-        'Directory': 'Collection',
-        'float': 'number',
-        'int': 'number',
-        'string': 'text',
-    }
-
-    def __init__(self, runner, tool, job_order, enable_reuse, uuid,
-                 submit_runner_ram=0, name=None, merged_map=None,
-                 loadingContext=None):
-        self.runner = runner
-        self.embedded_tool = tool
-        self.job = RunnerJob(
-            runner=runner,
-            tool=tool,
-            enable_reuse=enable_reuse,
-            output_name=None,
-            output_tags=None,
-            submit_runner_ram=submit_runner_ram,
-            name=name,
-            merged_map=merged_map,
-            loadingContext=loadingContext)
-        self.job.job_order = job_order
-        self.uuid = uuid
-
-    def pipeline_component_spec(self):
-        """Return a component that Workbench and a-r-p-i will understand.
-
-        Specifically, translate CWL input specs to Arvados pipeline
-        format, like {"dataclass":"File","value":"xyz"}.
-        """
-
-        spec = self.job.arvados_job_spec()
-
-        # Most of the component spec is exactly the same as the job
-        # spec (script, script_version, etc.).
-        # spec['script_parameters'] isn't right, though. A component
-        # spec's script_parameters hash is a translation of
-        # self.tool.tool['inputs'] with defaults/overrides taken from
-        # the job order. So we move the job parameters out of the way
-        # and build a new spec['script_parameters'].
-        job_params = spec['script_parameters']
-        spec['script_parameters'] = {}
-
-        for param in self.embedded_tool.tool['inputs']:
-            param = copy.deepcopy(param)
-
-            # Data type and "required" flag...
-            types = param['type']
-            if not isinstance(types, list):
-                types = [types]
-            param['required'] = 'null' not in types
-            non_null_types = [t for t in types if t != "null"]
-            if len(non_null_types) == 1:
-                the_type = [c for c in non_null_types][0]
-                dataclass = None
-                if isinstance(the_type, basestring):
-                    dataclass = self.type_to_dataclass.get(the_type)
-                if dataclass:
-                    param['dataclass'] = dataclass
-            # Note: If we didn't figure out a single appropriate
-            # dataclass, we just left that attribute out.  We leave
-            # the "type" attribute there in any case, which might help
-            # downstream.
-
-            # Title and description...
-            title = param.pop('label', '')
-            descr = param.pop('doc', '').rstrip('\n')
-            if title:
-                param['title'] = title
-            if descr:
-                param['description'] = descr
-
-            # Fill in the value from the current job order, if any.
-            param_id = shortname(param.pop('id'))
-            value = job_params.get(param_id)
-            if value is None:
-                pass
-            elif not isinstance(value, dict):
-                param['value'] = value
-            elif param.get('dataclass') in ('File', 'Collection') and value.get('location'):
-                param['value'] = value['location'][5:]
-
-            spec['script_parameters'][param_id] = param
-        spec['script_parameters']['cwl:tool'] = job_params['cwl:tool']
-        return spec
-
-    def save(self):
-        body = {
-            "components": {
-                self.job.name: self.pipeline_component_spec(),
-            },
-            "name": self.job.name,
-        }
-        if self.runner.project_uuid:
-            body["owner_uuid"] = self.runner.project_uuid
-        if self.uuid:
-            self.runner.api.pipeline_templates().update(
-                uuid=self.uuid, body=body).execute(
-                    num_retries=self.runner.num_retries)
-            logger.info("Updated template %s", self.uuid)
-        else:
-            self.uuid = self.runner.api.pipeline_templates().create(
-                body=body, ensure_unique_name=True).execute(
-                    num_retries=self.runner.num_retries)['uuid']
-            logger.info("Created template %s", self.uuid)
index 4fc02a0166455c13f6853cbce714fd6d7f2d90f4..704edaccb903eb83f1e66c983eb007fe1c4f8711 100644 (file)
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from cwltool.command_line_tool import CommandLineTool, ExpressionTool
-from .arvjob import ArvadosJob
 from .arvcontainer import ArvadosContainer
 from .pathmapper import ArvPathMapper
 from .runner import make_builder
@@ -48,8 +47,6 @@ class ArvadosCommandTool(CommandLineTool):
     def make_job_runner(self, runtimeContext):
         if runtimeContext.work_api == "containers":
             return partial(ArvadosContainer, self.arvrunner, runtimeContext)
-        elif runtimeContext.work_api == "jobs":
-            return partial(ArvadosJob, self.arvrunner)
         else:
             raise Exception("Unsupported work_api %s", runtimeContext.work_api)
 
@@ -58,10 +55,6 @@ class ArvadosCommandTool(CommandLineTool):
             return ArvPathMapper(self.arvrunner, reffiles+runtimeContext.extra_reffiles, runtimeContext.basedir,
                                  "/keep/%s",
                                  "/keep/%s/%s")
-        elif runtimeContext.work_api == "jobs":
-            return ArvPathMapper(self.arvrunner, reffiles, runtimeContext.basedir,
-                                 "$(task.keep)/%s",
-                                 "$(task.keep)/%s/%s")
 
     def job(self, joborder, output_callback, runtimeContext):
         builder = make_builder(joborder, self.hints, self.requirements, runtimeContext)
@@ -75,11 +68,6 @@ class ArvadosCommandTool(CommandLineTool):
             else:
                 runtimeContext.outdir = "/var/spool/cwl"
                 runtimeContext.docker_outdir = "/var/spool/cwl"
-        elif runtimeContext.work_api == "jobs":
-            runtimeContext.outdir = "$(task.outdir)"
-            runtimeContext.docker_outdir = "$(task.outdir)"
-            runtimeContext.tmpdir = "$(task.tmpdir)"
-            runtimeContext.docker_tmpdir = "$(task.tmpdir)"
         return super(ArvadosCommandTool, self).job(joborder, output_callback, runtimeContext)
 
 class ArvadosExpressionTool(ExpressionTool):
diff --git a/sdk/cwl/arvados_cwl/crunch_script.py b/sdk/cwl/arvados_cwl/crunch_script.py
deleted file mode 100644 (file)
index c886550..0000000
+++ /dev/null
@@ -1,159 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Crunch script integration for running arvados-cwl-runner (importing
-# arvados_cwl module) inside a crunch job.
-#
-# This gets the job record, transforms the script parameters into a valid CWL
-# input object, then executes the CWL runner to run the underlying workflow or
-# tool.  When the workflow completes, record the output object in an output
-# collection for this runner job.
-
-from past.builtins import basestring
-from future.utils import viewitems
-
-import arvados
-import arvados_cwl
-import arvados.collection
-import arvados.util
-import cwltool.main
-import logging
-import os
-import json
-import argparse
-import re
-import functools
-
-from arvados.api import OrderedJsonModel
-from cwltool.process import shortname
-from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, normalizeFilesDirs
-from cwltool.load_tool import load_tool
-from cwltool.errors import WorkflowException
-from arvados_cwl.context import ArvRuntimeContext
-
-from .fsaccess import CollectionFetcher, CollectionFsAccess
-
-logger = logging.getLogger('arvados.cwl-runner')
-
-def run():
-    # Timestamps are added by crunch-job, so don't print redundant timestamps.
-    arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
-
-    # Print package versions
-    logger.info(arvados_cwl.versionstring())
-
-    api = arvados.api("v1")
-
-    arvados_cwl.add_arv_hints()
-
-    runner = None
-    try:
-        job_order_object = arvados.current_job()['script_parameters']
-        toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool"))
-
-        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')
-
-        def keeppath(v):
-            if pdh_path.match(v):
-                return "keep:%s" % v
-            else:
-                return v
-
-        def keeppathObj(v):
-            if "location" in v:
-                v["location"] = keeppath(v["location"])
-
-        for k,v in viewitems(job_order_object):
-            if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
-                job_order_object[k] = {
-                    "class": "File",
-                    "location": "keep:%s" % v
-                }
-
-        adjustFileObjs(job_order_object, keeppathObj)
-        adjustDirObjs(job_order_object, keeppathObj)
-        normalizeFilesDirs(job_order_object)
-
-        output_name = None
-        output_tags = None
-        enable_reuse = True
-        on_error = "continue"
-        debug = False
-
-        if "arv:output_name" in job_order_object:
-            output_name = job_order_object["arv:output_name"]
-            del job_order_object["arv:output_name"]
-
-        if "arv:output_tags" in job_order_object:
-            output_tags = job_order_object["arv:output_tags"]
-            del job_order_object["arv:output_tags"]
-
-        if "arv:enable_reuse" in job_order_object:
-            enable_reuse = job_order_object["arv:enable_reuse"]
-            del job_order_object["arv:enable_reuse"]
-
-        if "arv:on_error" in job_order_object:
-            on_error = job_order_object["arv:on_error"]
-            del job_order_object["arv:on_error"]
-
-        if "arv:debug" in job_order_object:
-            debug = job_order_object["arv:debug"]
-            del job_order_object["arv:debug"]
-
-        arvargs = argparse.Namespace()
-        arvargs.work_api = "jobs"
-        arvargs.output_name = output_name
-        arvargs.output_tags = output_tags
-        arvargs.thread_count = 1
-        arvargs.collection_cache_size = None
-
-        runner = arvados_cwl.ArvCwlExecutor(api_client=arvados.safeapi.ThreadSafeApiCache(
-            api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}),
-                                          arvargs=arvargs)
-
-        make_fs_access = functools.partial(CollectionFsAccess,
-                                 collection_cache=runner.collection_cache)
-
-        t = load_tool(toolpath, runner.loadingContext)
-
-        if debug:
-            logger.setLevel(logging.DEBUG)
-            logging.getLogger('arvados').setLevel(logging.DEBUG)
-            logging.getLogger("cwltool").setLevel(logging.DEBUG)
-
-        args = ArvRuntimeContext(vars(arvargs))
-        args.project_uuid = arvados.current_job()["owner_uuid"]
-        args.enable_reuse = enable_reuse
-        args.on_error = on_error
-        args.submit = False
-        args.debug = debug
-        args.quiet = False
-        args.ignore_docker_for_reuse = False
-        args.basedir = os.getcwd()
-        args.name = None
-        args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
-        args.make_fs_access = make_fs_access
-        args.trash_intermediate = False
-        args.intermediate_output_ttl = 0
-        args.priority = arvados_cwl.DEFAULT_PRIORITY
-        args.do_validate = True
-        args.disable_js_validation = False
-        args.tmp_outdir_prefix = "tmp"
-
-        runner.arv_executor(t, job_order_object, args, logger=logger)
-    except Exception as e:
-        if isinstance(e, WorkflowException):
-            logging.info("Workflow error %s", e)
-        else:
-            logging.exception("Unhandled exception")
-        if runner and runner.final_output_collection:
-            outputCollection = runner.final_output_collection.portable_data_hash()
-        else:
-            outputCollection = None
-        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                             body={
-                                                 'output': outputCollection,
-                                                 'success': False,
-                                                 'progress':1.0
-                                             }).execute()
index 9a94095ae8648d7107cfd1d4025098c117866802..eed2fe19df6a3f78a4a1f0ee40d26ccbf50f3349 100644 (file)
@@ -31,7 +31,6 @@ from arvados.errors import ApiError
 
 import arvados_cwl.util
 from .arvcontainer import RunnerContainer
-from .arvjob import RunnerJob, RunnerTemplate
 from .runner import Runner, upload_docker, upload_job_order, upload_workflow_deps
 from .arvtool import ArvadosCommandTool, validate_cluster_target, ArvadosExpressionTool
 from .arvworkflow import ArvadosWorkflow, upload_workflow
@@ -91,8 +90,8 @@ class RuntimeStatusLoggingHandler(logging.Handler):
 
 
 class ArvCwlExecutor(object):
-    """Execute a CWL tool or workflow, submit work (using either jobs or
-    containers API), wait for them to complete, and report output.
+    """Execute a CWL tool or workflow, submit work (using containers API),
+    wait for them to complete, and report output.
 
     """
 
@@ -154,7 +153,7 @@ class ArvCwlExecutor(object):
                                            num_retries=self.num_retries)
 
         self.work_api = None
-        expected_api = ["containers", "jobs"]
+        expected_api = ["containers"]
         for api in expected_api:
             try:
                 methods = self.api._rootDesc.get('resources')[api]['methods']
@@ -172,19 +171,11 @@ class ArvCwlExecutor(object):
                 raise Exception("Unsupported API '%s', expected one of %s" % (arvargs.work_api, expected_api))
 
         if self.work_api == "jobs":
-            logger.warning("""
+            logger.error("""
 *******************************
-Using the deprecated 'jobs' API.
-
-To get rid of this warning:
-
-Users: read about migrating at
-http://doc.arvados.org/user/cwl/cwl-style.html#migrate
-and use the option --api=containers
-
-Admins: configure the cluster to disable the 'jobs' API as described at:
-http://doc.arvados.org/install/install-api-server.html#disable_api_methods
+The 'jobs' API is no longer supported.
 *******************************""")
+            exit(1)
 
         self.loadingContext = ArvLoadingContext(vars(arvargs))
         self.loadingContext.fetcher_constructor = self.fetcher_constructor
@@ -339,7 +330,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         return "[%s %s]" % (self.work_api[0:-1], obj.name)
 
     def poll_states(self):
-        """Poll status of jobs or containers listed in the processes dict.
+        """Poll status of containers listed in the processes dict.
 
         Runs in a separate thread.
         """
@@ -360,8 +351,6 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                 begin_poll = time.time()
                 if self.work_api == "containers":
                     table = self.poll_api.container_requests()
-                elif self.work_api == "jobs":
-                    table = self.poll_api.jobs()
 
                 pageSize = self.poll_api._rootDesc.get('maxItemsPerResponse', 1000)
 
@@ -410,18 +399,11 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
 
     def check_features(self, obj, parentfield=""):
         if isinstance(obj, dict):
-            if obj.get("writable") and self.work_api != "containers":
-                raise SourceLine(obj, "writable", UnsupportedRequirement).makeError("InitialWorkDir feature 'writable: true' not supported with --api=jobs")
             if obj.get("class") == "DockerRequirement":
                 if obj.get("dockerOutputDirectory"):
-                    if self.work_api != "containers":
-                        raise SourceLine(obj, "dockerOutputDirectory", UnsupportedRequirement).makeError(
-                            "Option 'dockerOutputDirectory' of DockerRequirement not supported with --api=jobs.")
                     if not obj.get("dockerOutputDirectory").startswith('/'):
                         raise SourceLine(obj, "dockerOutputDirectory", validate.ValidationException).makeError(
                             "Option 'dockerOutputDirectory' must be an absolute path.")
-            if obj.get("class") == "http://commonwl.org/cwltool#Secrets" and self.work_api != "containers":
-                raise SourceLine(obj, "class", UnsupportedRequirement).makeError("Secrets not supported with --api=jobs")
             if obj.get("class") == "InplaceUpdateRequirement":
                 if obj["inplaceUpdate"] and parentfield == "requirements":
                     raise SourceLine(obj, "class", UnsupportedRequirement).makeError("InplaceUpdateRequirement not supported for keep collections.")
@@ -522,13 +504,6 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             except Exception:
                 logger.exception("Setting container output")
                 return
-        elif self.work_api == "jobs" and "TASK_UUID" in os.environ:
-            self.api.job_tasks().update(uuid=os.environ["TASK_UUID"],
-                                   body={
-                                       'output': self.final_output_collection.portable_data_hash(),
-                                       'success': self.final_status == "success",
-                                       'progress':1.0
-                                   }).execute(num_retries=self.num_retries)
 
     def apply_reqs(self, job_order_object, tool):
         if "https://w3id.org/cwl/cwl#requirements" in job_order_object:
@@ -604,18 +579,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         existing_uuid = runtimeContext.update_workflow
         if existing_uuid or runtimeContext.create_workflow:
             # Create a pipeline template or workflow record and exit.
-            if self.work_api == "jobs":
-                tmpl = RunnerTemplate(self, tool, job_order,
-                                      runtimeContext.enable_reuse,
-                                      uuid=existing_uuid,
-                                      submit_runner_ram=runtimeContext.submit_runner_ram,
-                                      name=runtimeContext.name,
-                                      merged_map=merged_map,
-                                      loadingContext=loadingContext)
-                tmpl.save()
-                # cwltool.main will write our return value to stdout.
-                return (tmpl.uuid, "success")
-            elif self.work_api == "containers":
+            if self.work_api == "containers":
                 return (upload_workflow(self, tool, job_order,
                                         self.project_uuid,
                                         uuid=existing_uuid,
@@ -641,12 +605,6 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             runtimeContext.docker_outdir = "/var/spool/cwl"
             runtimeContext.tmpdir = "/tmp"
             runtimeContext.docker_tmpdir = "/tmp"
-        elif self.work_api == "jobs":
-            if runtimeContext.priority != DEFAULT_PRIORITY:
-                raise Exception("--priority not implemented for jobs API.")
-            runtimeContext.outdir = "$(task.outdir)"
-            runtimeContext.docker_outdir = "$(task.outdir)"
-            runtimeContext.tmpdir = "$(task.tmpdir)"
 
         if runtimeContext.priority < 1 or runtimeContext.priority > 1000:
             raise Exception("--priority must be in the range 1..1000.")
@@ -686,24 +644,6 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                                 secret_store=self.secret_store,
                                                 collection_cache_size=runtimeContext.collection_cache_size,
                                                 collection_cache_is_default=self.should_estimate_cache_size)
-            elif self.work_api == "jobs":
-                tool = RunnerJob(self, tool, loadingContext, runtimeContext.enable_reuse,
-                                      self.output_name,
-                                      self.output_tags,
-                                      submit_runner_ram=runtimeContext.submit_runner_ram,
-                                      name=runtimeContext.name,
-                                      on_error=runtimeContext.on_error,
-                                      submit_runner_image=runtimeContext.submit_runner_image,
-                                      merged_map=merged_map)
-        elif runtimeContext.cwl_runner_job is None and self.work_api == "jobs":
-            # Create pipeline for local run
-            self.pipeline = self.api.pipeline_instances().create(
-                body={
-                    "owner_uuid": self.project_uuid,
-                    "name": runtimeContext.name if runtimeContext.name else shortname(tool.tool["id"]),
-                    "components": {},
-                    "state": "RunningOnClient"}).execute(num_retries=self.num_retries)
-            logger.info("Pipeline instance %s", self.pipeline["uuid"])
 
         if runtimeContext.cwl_runner_job is not None:
             self.uuid = runtimeContext.cwl_runner_job.get('uuid')
index c22ece38b62259a12d8f05842b2cf2ce0a2b02fb..3374e1c13f8004100c2f3c114edbfba2db26dec6 100644 (file)
@@ -88,7 +88,6 @@ class TestContainer(unittest.TestCase):
     def setup_and_test_container_executor_and_logging(self, gcc_mock) :
         api = mock.MagicMock()
         api._rootDesc = copy.deepcopy(get_rootDesc())
-        del api._rootDesc.get('resources')['jobs']['methods']['create']
 
         # Make sure ArvCwlExecutor thinks it's running inside a container so it
         # adds the logging handler that will call runtime_status_update() mock
@@ -1071,6 +1070,5 @@ class TestWorkflow(unittest.TestCase):
 
         api = mock.MagicMock()
         api._rootDesc = copy.deepcopy(get_rootDesc())
-        del api._rootDesc.get('resources')['jobs']['methods']['create']
         runner = arvados_cwl.executor.ArvCwlExecutor(api)
         self.assertEqual(runner.work_api, 'containers')
diff --git a/sdk/cwl/tests/test_job.py b/sdk/cwl/tests/test_job.py
deleted file mode 100644 (file)
index f08e14f..0000000
+++ /dev/null
@@ -1,554 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from future import standard_library
-standard_library.install_aliases()
-from builtins import str
-from builtins import next
-
-import functools
-import json
-import logging
-import mock
-import os
-import unittest
-import copy
-import io
-import argparse
-
-import arvados
-import arvados_cwl
-import arvados_cwl.executor
-import cwltool.process
-from arvados.errors import ApiError
-from schema_salad.ref_resolver import Loader
-from schema_salad.sourceline import cmap
-from .mock_discovery import get_rootDesc
-from .matcher import JsonDiffMatcher, StripYAMLComments
-from .test_container import CollectionMock
-from arvados_cwl.arvdocker import arv_docker_clear_cache
-
-if not os.getenv('ARVADOS_DEBUG'):
-    logging.getLogger('arvados.cwl-runner').setLevel(logging.WARN)
-    logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
-
-class TestJob(unittest.TestCase):
-
-    def setUp(self):
-        cwltool.process._names = set()
-
-    def helper(self, runner, enable_reuse=True):
-        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.1")
-
-        make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
-                                         collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
-        loadingContext = arvados_cwl.context.ArvLoadingContext(
-            {"avsc_names": avsc_names,
-             "basedir": "",
-             "make_fs_access": make_fs_access,
-             "loader": Loader({}),
-             "metadata": {"cwlVersion": "v1.1", "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"},
-             "makeTool": runner.arv_make_tool})
-        runtimeContext = arvados_cwl.context.ArvRuntimeContext(
-            {"work_api": "jobs",
-             "basedir": "",
-             "name": "test_run_job_"+str(enable_reuse),
-             "make_fs_access": make_fs_access,
-             "enable_reuse": enable_reuse,
-             "priority": 500})
-
-        return loadingContext, runtimeContext
-
-    # The test passes no builder.resources
-    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
-    @mock.patch('arvados.commands.keepdocker.list_images_in_arv')
-    def test_run(self, list_images_in_arv):
-        for enable_reuse in (True, False):
-            arv_docker_clear_cache()
-            runner = mock.MagicMock()
-            runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-            runner.ignore_docker_for_reuse = False
-            runner.num_retries = 0
-
-            list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]]
-            runner.api.collections().get().execute.return_value = {"portable_data_hash": "99999999999999999999999999999993+99"}
-            # Simulate reused job from another project so that we can check is a can_read
-            # link is added.
-            runner.api.jobs().create().execute.return_value = {
-                'state': 'Complete' if enable_reuse else 'Queued',
-                'owner_uuid': 'zzzzz-tpzed-yyyyyyyyyyyyyyy' if enable_reuse else 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                'uuid': 'zzzzz-819sb-yyyyyyyyyyyyyyy',
-                'output': None,
-            }
-
-            tool = cmap({
-                "inputs": [],
-                "outputs": [],
-                "baseCommand": "ls",
-                "arguments": [{"valueFrom": "$(runtime.outdir)"}],
-                "id": "#",
-                "class": "CommandLineTool"
-            })
-
-            loadingContext, runtimeContext = self.helper(runner, enable_reuse)
-
-            arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
-            arvtool.formatgraph = None
-            for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
-                j.run(runtimeContext)
-                runner.api.jobs().create.assert_called_with(
-                    body=JsonDiffMatcher({
-                        'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                        'runtime_constraints': {},
-                        'script_parameters': {
-                            'tasks': [{
-                                'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'},
-                                'command': ['ls', '$(task.outdir)']
-                            }],
-                        },
-                        'script_version': 'master',
-                        'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
-                        'repository': 'arvados',
-                        'script': 'crunchrunner',
-                        'runtime_constraints': {
-                            'docker_image': 'arvados/jobs',
-                            'min_cores_per_node': 1,
-                            'min_ram_mb_per_node': 1024,
-                            'min_scratch_mb_per_node': 2048 # tmpdirSize + outdirSize
-                        }
-                    }),
-                    find_or_create=enable_reuse,
-                    filters=[['repository', '=', 'arvados'],
-                             ['script', '=', 'crunchrunner'],
-                             ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
-                             ['docker_image_locator', 'in docker', 'arvados/jobs']]
-                )
-                if enable_reuse:
-                    runner.api.links().create.assert_called_with(
-                        body=JsonDiffMatcher({
-                            'link_class': 'permission',
-                            'name': 'can_read',
-                            "tail_uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz",
-                            "head_uuid": "zzzzz-819sb-yyyyyyyyyyyyyyy",
-                        })
-                    )
-                    # Simulate an API excepction when trying to create a
-                    # sharing link on the job
-                    runner.api.links().create.side_effect = ApiError(
-                        mock.MagicMock(return_value={'status': 403}),
-                        bytes(b'Permission denied'))
-                    j.run(runtimeContext)
-                else:
-                    assert not runner.api.links().create.called
-
-    # The test passes some fields in builder.resources
-    # For the remaining fields, the defaults will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
-    @mock.patch('arvados.commands.keepdocker.list_images_in_arv')
-    def test_resource_requirements(self, list_images_in_arv):
-        runner = mock.MagicMock()
-        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        runner.ignore_docker_for_reuse = False
-        runner.num_retries = 0
-        arvados_cwl.add_arv_hints()
-
-        list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]]
-        runner.api.collections().get().execute.return_value = {"portable_data_hash": "99999999999999999999999999999993+99"}
-
-        tool = {
-            "inputs": [],
-            "outputs": [],
-            "hints": [{
-                "class": "ResourceRequirement",
-                "coresMin": 3,
-                "ramMin": 3000,
-                "tmpdirMin": 4000
-            }, {
-                "class": "http://arvados.org/cwl#RuntimeConstraints",
-                "keep_cache": 512,
-                "outputDirType": "keep_output_dir"
-            }, {
-                "class": "http://arvados.org/cwl#APIRequirement",
-            },
-            {
-                "class": "http://arvados.org/cwl#ReuseRequirement",
-                "enableReuse": False
-            }],
-            "baseCommand": "ls",
-            "id": "#",
-            "class": "CommandLineTool"
-        }
-
-        loadingContext, runtimeContext = self.helper(runner)
-
-        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
-        arvtool.formatgraph = None
-        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
-            j.run(runtimeContext)
-        runner.api.jobs().create.assert_called_with(
-            body=JsonDiffMatcher({
-                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                'runtime_constraints': {},
-                'script_parameters': {
-                    'tasks': [{
-                        'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'},
-                        'task.keepTmpOutput': True,
-                        'command': ['ls']
-                    }]
-            },
-            'script_version': 'master',
-                'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
-                'repository': 'arvados',
-                'script': 'crunchrunner',
-                'runtime_constraints': {
-                    'docker_image': 'arvados/jobs',
-                    'min_cores_per_node': 3,
-                    'min_ram_mb_per_node': 3512,     # ramMin + keep_cache
-                    'min_scratch_mb_per_node': 5024, # tmpdirSize + outdirSize
-                    'keep_cache_mb_per_task': 512
-                }
-            }),
-            find_or_create=False,
-            filters=[['repository', '=', 'arvados'],
-                     ['script', '=', 'crunchrunner'],
-                     ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
-                     ['docker_image_locator', 'in docker', 'arvados/jobs']])
-
-    @mock.patch("arvados.collection.CollectionReader")
-    def test_done(self, reader):
-        api = mock.MagicMock()
-
-        runner = mock.MagicMock()
-        runner.api = api
-        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        runner.num_retries = 0
-        runner.ignore_docker_for_reuse = False
-
-        reader().keys.return_value = "log.txt"
-        reader().open.return_value = io.StringIO(
-            str(u"""2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.tmpdir)=/tmp/crunch-job-task-work/compute3.1/tmpdir
-2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.outdir)=/tmp/crunch-job-task-work/compute3.1/outdir
-2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.keep)=/keep
-        """))
-        api.collections().list().execute.side_effect = ({"items": []},
-                                                        {"items": [{"manifest_text": "XYZ"}]},
-                                                        {"items": []},
-                                                        {"items": [{"manifest_text": "ABC"}]})
-
-        arvjob = arvados_cwl.ArvadosJob(runner,
-                                        mock.MagicMock(),
-                                        {},
-                                        None,
-                                        [],
-                                        [],
-                                        "testjob")
-        arvjob.output_callback = mock.MagicMock()
-        arvjob.collect_outputs = mock.MagicMock()
-        arvjob.collect_outputs.return_value = {"out": "stuff"}
-
-        arvjob.done({
-            "state": "Complete",
-            "output": "99999999999999999999999999999993+99",
-            "log": "99999999999999999999999999999994+99",
-            "uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        })
-
-        api.collections().list.assert_has_calls([
-            mock.call(),
-            # Output collection check
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                          ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
-                          ['name', '=', 'Output 9999999 of testjob']]),
-            mock.call().execute(num_retries=0),
-            mock.call(limit=1, filters=[['portable_data_hash', '=', '99999999999999999999999999999993+99']],
-                 select=['manifest_text']),
-            mock.call().execute(num_retries=0),
-            # Log collection's turn
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                          ['portable_data_hash', '=', '99999999999999999999999999999994+99'],
-                          ['name', '=', 'Log of zzzzz-8i9sb-zzzzzzzzzzzzzzz']]),
-            mock.call().execute(num_retries=0),
-            mock.call(limit=1, filters=[['portable_data_hash', '=', '99999999999999999999999999999994+99']],
-                 select=['manifest_text']),
-            mock.call().execute(num_retries=0)])
-
-        api.collections().create.assert_has_calls([
-            mock.call(ensure_unique_name=True,
-                      body={'portable_data_hash': '99999999999999999999999999999993+99',
-                            'manifest_text': 'XYZ',
-                            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                            'name': 'Output 9999999 of testjob'}),
-            mock.call().execute(num_retries=0),
-            mock.call(ensure_unique_name=True,
-                      body={'portable_data_hash': '99999999999999999999999999999994+99',
-                            'manifest_text': 'ABC',
-                            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                            'name': 'Log of zzzzz-8i9sb-zzzzzzzzzzzzzzz'}),
-            mock.call().execute(num_retries=0),
-        ])
-
-        arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
-
-    @mock.patch("arvados.collection.CollectionReader")
-    def test_done_use_existing_collection(self, reader):
-        api = mock.MagicMock()
-
-        runner = mock.MagicMock()
-        runner.api = api
-        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        runner.num_retries = 0
-
-        reader().keys.return_value = "log.txt"
-        reader().open.return_value = io.StringIO(
-            str(u"""2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.tmpdir)=/tmp/crunch-job-task-work/compute3.1/tmpdir
-2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.outdir)=/tmp/crunch-job-task-work/compute3.1/outdir
-2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.keep)=/keep
-        """))
-
-        api.collections().list().execute.side_effect = (
-            {"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2"}]},
-            {"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2"}]},
-        )
-
-        arvjob = arvados_cwl.ArvadosJob(runner,
-                                        mock.MagicMock(),
-                                        {},
-                                        None,
-                                        [],
-                                        [],
-                                        "testjob")
-        arvjob.output_callback = mock.MagicMock()
-        arvjob.collect_outputs = mock.MagicMock()
-        arvjob.collect_outputs.return_value = {"out": "stuff"}
-
-        arvjob.done({
-            "state": "Complete",
-            "output": "99999999999999999999999999999993+99",
-            "log": "99999999999999999999999999999994+99",
-            "uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        })
-
-        api.collections().list.assert_has_calls([
-            mock.call(),
-            # Output collection
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                               ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
-                               ['name', '=', 'Output 9999999 of testjob']]),
-            mock.call().execute(num_retries=0),
-            # Log collection
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                               ['portable_data_hash', '=', '99999999999999999999999999999994+99'],
-                               ['name', '=', 'Log of zzzzz-8i9sb-zzzzzzzzzzzzzzz']]),
-            mock.call().execute(num_retries=0)
-        ])
-
-        self.assertFalse(api.collections().create.called)
-
-        arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
-
-
-class TestWorkflow(unittest.TestCase):
-
-    def setUp(self):
-        cwltool.process._names = set()
-
-    def helper(self, runner, enable_reuse=True):
-        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.1")
-
-        make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
-                                         collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
-
-        document_loader.fetcher_constructor = functools.partial(arvados_cwl.CollectionFetcher, api_client=runner.api, fs_access=make_fs_access(""))
-        document_loader.fetcher = document_loader.fetcher_constructor(document_loader.cache, document_loader.session)
-        document_loader.fetch_text = document_loader.fetcher.fetch_text
-        document_loader.check_exists = document_loader.fetcher.check_exists
-
-        loadingContext = arvados_cwl.context.ArvLoadingContext(
-            {"avsc_names": avsc_names,
-             "basedir": "",
-             "make_fs_access": make_fs_access,
-             "loader": document_loader,
-             "metadata": {"cwlVersion": "v1.1", "http://commonwl.org/cwltool#original_cwlVersion": "v1.0"},
-             "construct_tool_object": runner.arv_make_tool})
-        runtimeContext = arvados_cwl.context.ArvRuntimeContext(
-            {"work_api": "jobs",
-             "basedir": "",
-             "name": "test_run_wf_"+str(enable_reuse),
-             "make_fs_access": make_fs_access,
-             "enable_reuse": enable_reuse,
-             "priority": 500})
-
-        return loadingContext, runtimeContext
-
-    # The test passes no builder.resources
-    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
-    @mock.patch("arvados.collection.CollectionReader")
-    @mock.patch("arvados.collection.Collection")
-    @mock.patch('arvados.commands.keepdocker.list_images_in_arv')
-    def test_run(self, list_images_in_arv, mockcollection, mockcollectionreader):
-        arv_docker_clear_cache()
-        arvados_cwl.add_arv_hints()
-
-        api = mock.MagicMock()
-        api._rootDesc = get_rootDesc()
-
-        runner = arvados_cwl.executor.ArvCwlExecutor(api, argparse.Namespace(work_api="jobs",
-                                                                             output_name=None,
-                                                                             output_tags=None,
-                                                                             thread_count=1,
-                                                                             collection_cache_size=None))
-        self.assertEqual(runner.work_api, 'jobs')
-
-        list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]]
-        runner.api.collections().get().execute.return_value = {"portable_data_hash": "99999999999999999999999999999993+99"}
-        runner.api.collections().list().execute.return_value = {"items": [{
-            "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz",
-            "portable_data_hash": "99999999999999999999999999999993+99"}]}
-
-        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        runner.ignore_docker_for_reuse = False
-        runner.num_retries = 0
-
-        loadingContext, runtimeContext = self.helper(runner)
-        runner.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir)
-        tool, metadata = loadingContext.loader.resolve_ref("tests/wf/scatter2.cwl")
-        metadata["cwlVersion"] = tool["cwlVersion"]
-
-        mockc = mock.MagicMock()
-        mockcollection.side_effect = lambda *args, **kwargs: CollectionMock(mockc, *args, **kwargs)
-        mockcollectionreader().find.return_value = arvados.arvfile.ArvadosFile(mock.MagicMock(), "token.txt")
-
-        arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext)
-        arvtool.formatgraph = None
-        it = arvtool.job({}, mock.MagicMock(), runtimeContext)
-
-        next(it).run(runtimeContext)
-        next(it).run(runtimeContext)
-
-        with open("tests/wf/scatter2_subwf.cwl") as f:
-            subwf = StripYAMLComments(f.read().rstrip())
-
-        runner.api.jobs().create.assert_called_with(
-            body=JsonDiffMatcher({
-                'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
-                'repository': 'arvados',
-                'script_version': 'master',
-                'script': 'crunchrunner',
-                'script_parameters': {
-                    'tasks': [{'task.env': {
-                        'HOME': '$(task.outdir)',
-                        'TMPDIR': '$(task.tmpdir)'},
-                               'task.vwd': {
-                                   'workflow.cwl': '$(task.keep)/99999999999999999999999999999996+99/workflow.cwl',
-                                   'cwl.input.yml': '$(task.keep)/99999999999999999999999999999996+99/cwl.input.yml'
-                               },
-                    'command': [u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml'],
-                    'task.stdout': 'cwl.output.json'}]},
-                'runtime_constraints': {
-                    'min_scratch_mb_per_node': 2048,
-                    'min_cores_per_node': 1,
-                    'docker_image': 'arvados/jobs',
-                    'min_ram_mb_per_node': 1024
-                },
-                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}),
-            filters=[['repository', '=', 'arvados'],
-                     ['script', '=', 'crunchrunner'],
-                     ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
-                     ['docker_image_locator', 'in docker', 'arvados/jobs']],
-            find_or_create=True)
-
-        mockc.open().__enter__().write.assert_has_calls([mock.call(subwf)])
-        mockc.open().__enter__().write.assert_has_calls([mock.call(
-bytes(b'''{
-  "fileblub": {
-    "basename": "token.txt",
-    "class": "File",
-    "location": "/keep/99999999999999999999999999999999+118/token.txt",
-    "size": 0
-  },
-  "sleeptime": 5
-}'''))])
-
-    # The test passes no builder.resources
-    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
-    @mock.patch("arvados.collection.CollectionReader")
-    @mock.patch("arvados.collection.Collection")
-    @mock.patch('arvados.commands.keepdocker.list_images_in_arv')
-    def test_overall_resource_singlecontainer(self, list_images_in_arv, mockcollection, mockcollectionreader):
-        arv_docker_clear_cache()
-        arvados_cwl.add_arv_hints()
-
-        api = mock.MagicMock()
-        api._rootDesc = get_rootDesc()
-
-        runner = arvados_cwl.executor.ArvCwlExecutor(api, argparse.Namespace(work_api="jobs",
-                                                                             output_name=None,
-                                                                             output_tags=None,
-                                                                             thread_count=1,
-                                                                             collection_cache_size=None))
-        self.assertEqual(runner.work_api, 'jobs')
-
-        list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]]
-        runner.api.collections().get().execute.return_value = {"portable_data_hash": "99999999999999999999999999999993+99"}
-        runner.api.collections().list().execute.return_value = {"items": [{
-            "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz",
-            "portable_data_hash": "99999999999999999999999999999993+99"}]}
-
-        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        runner.ignore_docker_for_reuse = False
-        runner.num_retries = 0
-
-        loadingContext, runtimeContext = self.helper(runner)
-        loadingContext.do_update = True
-        runner.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir)
-        tool, metadata = loadingContext.loader.resolve_ref("tests/wf/echo-wf.cwl")
-
-        mockcollection.side_effect = lambda *args, **kwargs: CollectionMock(mock.MagicMock(), *args, **kwargs)
-
-        arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext)
-        arvtool.formatgraph = None
-        it = arvtool.job({}, mock.MagicMock(), runtimeContext)
-
-        next(it).run(runtimeContext)
-        next(it).run(runtimeContext)
-
-        with open("tests/wf/echo-subwf.cwl") as f:
-            subwf = StripYAMLComments(f.read())
-
-        runner.api.jobs().create.assert_called_with(
-            body=JsonDiffMatcher({
-                'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
-                'repository': 'arvados',
-                'script_version': 'master',
-                'script': 'crunchrunner',
-                'script_parameters': {
-                    'tasks': [{'task.env': {
-                        'HOME': '$(task.outdir)',
-                        'TMPDIR': '$(task.tmpdir)'},
-                               'task.vwd': {
-                                   'workflow.cwl': '$(task.keep)/99999999999999999999999999999996+99/workflow.cwl',
-                                   'cwl.input.yml': '$(task.keep)/99999999999999999999999999999996+99/cwl.input.yml'
-                               },
-                    'command': [u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml'],
-                    'task.stdout': 'cwl.output.json'}]},
-                'runtime_constraints': {
-                    'min_scratch_mb_per_node': 4096,
-                    'min_cores_per_node': 3,
-                    'docker_image': 'arvados/jobs',
-                    'min_ram_mb_per_node': 1024
-                },
-                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}),
-            filters=[['repository', '=', 'arvados'],
-                     ['script', '=', 'crunchrunner'],
-                     ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
-                     ['docker_image_locator', 'in docker', 'arvados/jobs']],
-            find_or_create=True)
-
-    def test_default_work_api(self):
-        arvados_cwl.add_arv_hints()
-
-        api = mock.MagicMock()
-        api._rootDesc = copy.deepcopy(get_rootDesc())
-        del api._rootDesc.get('resources')['jobs']['methods']['create']
-        runner = arvados_cwl.executor.ArvCwlExecutor(api)
-        self.assertEqual(runner.work_api, 'containers')
index 1dbd968eaa730f738ab87c3eef8ac66b486b8bfc..d215cba7fc0041fc6ec9540bda956e856c393c2a 100644 (file)
@@ -340,73 +340,6 @@ class TestSubmit(unittest.TestCase):
     def setUp(self):
         cwltool.process._names = set()
 
-    @mock.patch("arvados_cwl.arvdocker.arv_docker_get_image")
-    @mock.patch("time.sleep")
-    @stubs
-    def test_submit(self, stubs, tm, arvdock):
-        def get_image(api_client, dockerRequirement, pull_image, project_uuid):
-            if dockerRequirement["dockerPull"] == 'arvados/jobs:'+arvados_cwl.__version__:
-                return '999999999999999999999999999999d3+99'
-            elif dockerRequirement["dockerPull"] == "debian:8":
-                return '999999999999999999999999999999d4+99'
-        arvdock.side_effect = get_image
-
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--api=jobs", "--debug",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        stubs.api.collections().create.assert_has_calls([
-            mock.call(body=JsonDiffMatcher({
-                'manifest_text':
-                '. 979af1245a12a1fed634d4222473bfdc+16 0:16:blorp.txt\n',
-                'replication_desired': None,
-                'name': 'submit_wf.cwl input (169f39d466a5438ac4a90e779bf750c7+53)',
-            }), ensure_unique_name=False),
-            mock.call(body=JsonDiffMatcher({
-                'manifest_text':
-                '. 5bcc9fe8f8d5992e6cf418dc7ce4dbb3+16 0:16:blub.txt\n',
-                'replication_desired': None,
-                'name': 'submit_tool.cwl dependencies (5d373e7629203ce39e7c22af98a0f881+52)',
-            }), ensure_unique_name=False),
-            mock.call(body=JsonDiffMatcher({
-                'manifest_text':
-                ". 68089141fbf7e020ac90a9d6a575bc8f+1312 0:1312:workflow.cwl\n",
-                'replication_desired': None,
-                'name': 'submit_wf.cwl',
-            }), ensure_unique_name=True)        ])
-
-        arvdock.assert_has_calls([
-            mock.call(stubs.api, {"class": "DockerRequirement", "dockerPull": "debian:8"}, True, None),
-            mock.call(stubs.api, {"class": "DockerRequirement", "dockerPull": "debian:8", 'http://arvados.org/cwl#dockerCollectionPDH': '999999999999999999999999999999d4+99'}, True, None),
-            mock.call(stubs.api, {'dockerPull': 'arvados/jobs:'+arvados_cwl.__version__}, True, None)
-        ])
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_uuid + '\n')
-        self.assertEqual(exited, 0)
-
-    @mock.patch("time.sleep")
-    @stubs
-    def test_submit_no_reuse(self, stubs, tm):
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--api=jobs", "--debug", "--disable-reuse",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        expect_pipeline["components"]["cwl-runner"]["script_parameters"]["arv:enable_reuse"] = {"value": False}
-        expect_pipeline["properties"] = {"run_options": {"enable_job_reuse": False}}
-
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_uuid + '\n')
-        self.assertEqual(exited, 0)
-
     @stubs
     def test_error_when_multiple_storage_classes_specified(self, stubs):
         storage_classes = "foo,bar"
@@ -416,41 +349,6 @@ class TestSubmit(unittest.TestCase):
                 sys.stdin, sys.stderr, api_client=stubs.api)
         self.assertEqual(exited, 1)
 
-    @mock.patch("time.sleep")
-    @stubs
-    def test_submit_on_error(self, stubs, tm):
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--api=jobs", "--debug", "--on-error=stop",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        expect_pipeline["components"]["cwl-runner"]["script_parameters"]["arv:on_error"] = "stop"
-
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_uuid + '\n')
-        self.assertEqual(exited, 0)
-
-    @mock.patch("time.sleep")
-    @stubs
-    def test_submit_runner_ram(self, stubs, tm):
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--debug", "--submit-runner-ram=2048",
-             "--api=jobs",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        expect_pipeline["components"]["cwl-runner"]["runtime_constraints"]["min_ram_mb_per_node"] = 2048
-
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_uuid + '\n')
-        self.assertEqual(exited, 0)
-
     @mock.patch("time.sleep")
     @stubs
     def test_submit_invalid_runner_ram(self, stubs, tm):
@@ -460,81 +358,6 @@ class TestSubmit(unittest.TestCase):
             stubs.capture_stdout, sys.stderr, api_client=stubs.api)
         self.assertEqual(exited, 1)
 
-    @mock.patch("time.sleep")
-    @stubs
-    def test_submit_output_name(self, stubs, tm):
-        output_name = "test_output_name"
-
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--debug", "--output-name", output_name,
-             "--api=jobs",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        expect_pipeline["components"]["cwl-runner"]["script_parameters"]["arv:output_name"] = output_name
-
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_uuid + '\n')
-        self.assertEqual(exited, 0)
-
-    @mock.patch("time.sleep")
-    @stubs
-    def test_submit_pipeline_name(self, stubs, tm):
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--debug", "--name=hello job 123",
-             "--api=jobs",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        expect_pipeline["name"] = "hello job 123"
-
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_uuid + '\n')
-
-    @mock.patch("time.sleep")
-    @stubs
-    def test_submit_output_tags(self, stubs, tm):
-        output_tags = "tag0,tag1,tag2"
-
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--debug", "--output-tags", output_tags,
-             "--api=jobs",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        expect_pipeline["components"]["cwl-runner"]["script_parameters"]["arv:output_tags"] = output_tags
-
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_uuid + '\n')
-
-    @mock.patch("time.sleep")
-    @stubs
-    def test_submit_with_project_uuid(self, stubs, tm):
-        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
-
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--debug",
-             "--project-uuid", project_uuid,
-             "--api=jobs",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            sys.stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        expect_pipeline["owner_uuid"] = project_uuid
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
 
     @stubs
     def test_submit_container(self, stubs):
@@ -878,28 +701,6 @@ class TestSubmit(unittest.TestCase):
                          stubs.expect_container_request_uuid + '\n')
         self.assertEqual(exited, 0)
 
-    @mock.patch("arvados.collection.CollectionReader")
-    @mock.patch("time.sleep")
-    @stubs
-    def test_submit_jobs_keepref(self, stubs, tm, reader):
-        with open("tests/wf/expect_arvworkflow.cwl") as f:
-            reader().open().__enter__().read.return_value = f.read()
-
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--api=jobs", "--debug",
-             "keep:99999999999999999999999999999994+99/expect_arvworkflow.cwl#main", "-x", "XxX"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        expect_pipeline["components"]["cwl-runner"]["script_parameters"]["x"] = "XxX"
-        del expect_pipeline["components"]["cwl-runner"]["script_parameters"]["y"]
-        del expect_pipeline["components"]["cwl-runner"]["script_parameters"]["z"]
-        expect_pipeline["components"]["cwl-runner"]["script_parameters"]["cwl:tool"] = "99999999999999999999999999999994+99/expect_arvworkflow.cwl#main"
-        expect_pipeline["name"] = "expect_arvworkflow.cwl#main"
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(exited, 0)
-
     @mock.patch("time.sleep")
     @stubs
     def test_submit_arvworkflow(self, stubs, tm):
@@ -1115,22 +916,6 @@ class TestSubmit(unittest.TestCase):
                          stubs.expect_container_request_uuid + '\n')
         self.assertEqual(exited, 0)
 
-    @stubs
-    def test_submit_job_runner_image(self, stubs):
-        exited = arvados_cwl.main(
-            ["--submit", "--no-wait", "--api=jobs", "--debug", "--submit-runner-image=arvados/jobs:123",
-                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-
-        stubs.expect_pipeline_instance["components"]["cwl-runner"]["runtime_constraints"]["docker_image"] = "999999999999999999999999999999d5+99"
-
-        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
-        stubs.api.pipeline_instances().create.assert_called_with(
-            body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_uuid + '\n')
-        self.assertEqual(exited, 0)
-
     @stubs
     def test_submit_container_runner_image(self, stubs):
         exited = arvados_cwl.main(
@@ -1543,123 +1328,6 @@ class TestSubmit(unittest.TestCase):
             cwltool_logger.removeHandler(stderr_logger)
 
 
-class TestCreateTemplate(unittest.TestCase):
-    existing_template_uuid = "zzzzz-p5p6p-validworkfloyml"
-
-    def _adjust_script_params(self, expect_component):
-        expect_component['script_parameters']['x'] = {
-            'dataclass': 'File',
-            'required': True,
-            'type': 'File',
-            'value': '169f39d466a5438ac4a90e779bf750c7+53/blorp.txt',
-        }
-        expect_component['script_parameters']['y'] = {
-            'dataclass': 'Collection',
-            'required': True,
-            'type': 'Directory',
-            'value': '99999999999999999999999999999998+99',
-        }
-        expect_component['script_parameters']['z'] = {
-            'dataclass': 'Collection',
-            'required': True,
-            'type': 'Directory',
-        }
-
-    @stubs
-    def test_create(self, stubs):
-        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
-
-        exited = arvados_cwl.main(
-            ["--create-workflow", "--debug",
-             "--api=jobs",
-             "--project-uuid", project_uuid,
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        stubs.api.pipeline_instances().create.refute_called()
-        stubs.api.jobs().create.refute_called()
-
-        expect_component = copy.deepcopy(stubs.expect_job_spec)
-        self._adjust_script_params(expect_component)
-        expect_template = {
-            "components": {
-                "submit_wf.cwl": expect_component,
-            },
-            "name": "submit_wf.cwl",
-            "owner_uuid": project_uuid,
-        }
-        stubs.api.pipeline_templates().create.assert_called_with(
-            body=JsonDiffMatcher(expect_template), ensure_unique_name=True)
-
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_template_uuid + '\n')
-        self.assertEqual(exited, 0)
-
-    @stubs
-    def test_create_name(self, stubs):
-        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
-
-        exited = arvados_cwl.main(
-            ["--create-workflow", "--debug",
-             "--project-uuid", project_uuid,
-             "--api=jobs",
-             "--name", "testing 123",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        stubs.api.pipeline_instances().create.refute_called()
-        stubs.api.jobs().create.refute_called()
-
-        expect_component = copy.deepcopy(stubs.expect_job_spec)
-        self._adjust_script_params(expect_component)
-        expect_template = {
-            "components": {
-                "testing 123": expect_component,
-            },
-            "name": "testing 123",
-            "owner_uuid": project_uuid,
-        }
-        stubs.api.pipeline_templates().create.assert_called_with(
-            body=JsonDiffMatcher(expect_template), ensure_unique_name=True)
-
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         stubs.expect_pipeline_template_uuid + '\n')
-        self.assertEqual(exited, 0)
-
-    @stubs
-    def test_update_name(self, stubs):
-        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
-
-        exited = arvados_cwl.main(
-            ["--update-workflow", self.existing_template_uuid,
-             "--debug",
-             "--project-uuid", project_uuid,
-             "--api=jobs",
-             "--name", "testing 123",
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        stubs.api.pipeline_instances().create.refute_called()
-        stubs.api.jobs().create.refute_called()
-
-        expect_component = copy.deepcopy(stubs.expect_job_spec)
-        self._adjust_script_params(expect_component)
-        expect_template = {
-            "components": {
-                "testing 123": expect_component,
-            },
-            "name": "testing 123",
-            "owner_uuid": project_uuid,
-        }
-        stubs.api.pipeline_templates().create.refute_called()
-        stubs.api.pipeline_templates().update.assert_called_with(
-            body=JsonDiffMatcher(expect_template), uuid=self.existing_template_uuid)
-
-        self.assertEqual(stubs.capture_stdout.getvalue(),
-                         self.existing_template_uuid + '\n')
-        self.assertEqual(exited, 0)
-
-
 class TestCreateWorkflow(unittest.TestCase):
     existing_workflow_uuid = "zzzzz-7fd4e-validworkfloyml"
     expect_workflow = StripYAMLComments(
@@ -1724,26 +1392,6 @@ class TestCreateWorkflow(unittest.TestCase):
                          stubs.expect_workflow_uuid + '\n')
         self.assertEqual(exited, 0)
 
-    @stubs
-    def test_incompatible_api(self, stubs):
-        capture_stderr = StringIO()
-        acr_logger = logging.getLogger('arvados.cwl-runner')
-        stderr_logger = logging.StreamHandler(capture_stderr)
-        acr_logger.addHandler(stderr_logger)
-
-        try:
-            exited = arvados_cwl.main(
-                ["--update-workflow", self.existing_workflow_uuid,
-                 "--api=jobs",
-                 "--debug",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                sys.stderr, sys.stderr, api_client=stubs.api)
-            self.assertEqual(exited, 1)
-            self.assertRegexpMatches(
-                capture_stderr.getvalue(),
-                "--update-workflow arg '{}' uses 'containers' API, but --api='jobs' specified".format(self.existing_workflow_uuid))
-        finally:
-            acr_logger.removeHandler(stderr_logger)
 
     @stubs
     def test_update(self, stubs):
@@ -1817,82 +1465,3 @@ class TestCreateWorkflow(unittest.TestCase):
         self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_workflow_uuid + '\n')
         self.assertEqual(exited, 0)
-
-class TestTemplateInputs(unittest.TestCase):
-    expect_template = {
-        "components": {
-            "inputs_test.cwl": {
-                'runtime_constraints': {
-                    'docker_image': '999999999999999999999999999999d3+99',
-                    'min_ram_mb_per_node': 1024
-                },
-                'script_parameters': {
-                    'cwl:tool':
-                    'a2de777156fb700f1363b1f2e370adca+60/workflow.cwl#main',
-                    'optionalFloatInput': None,
-                    'fileInput': {
-                        'type': 'File',
-                        'dataclass': 'File',
-                        'required': True,
-                        'title': "It's a file; we expect to find some characters in it.",
-                        'description': 'If there were anything further to say, it would be said here,\nor here.'
-                    },
-                    'floatInput': {
-                        'type': 'float',
-                        'dataclass': 'number',
-                        'required': True,
-                        'title': 'Floats like a duck',
-                        'default': 0.1,
-                        'value': 0.1,
-                    },
-                    'optionalFloatInput': {
-                        'type': ['null', 'float'],
-                        'dataclass': 'number',
-                        'required': False,
-                    },
-                    'boolInput': {
-                        'type': 'boolean',
-                        'dataclass': 'boolean',
-                        'required': True,
-                        'title': 'True or false?',
-                    },
-                },
-                'repository': 'arvados',
-                'script_version': 'master',
-                'minimum_script_version': '570509ab4d2ef93d870fd2b1f2eab178afb1bad9',
-                'script': 'cwl-runner',
-            },
-        },
-        "name": "inputs_test.cwl",
-    }
-
-    @stubs
-    def test_inputs_empty(self, stubs):
-        exited = arvados_cwl.main(
-            ["--debug", "--api=jobs", "--create-template",
-             "tests/wf/inputs_test.cwl", "tests/order/empty_order.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        stubs.api.pipeline_templates().create.assert_called_with(
-            body=JsonDiffMatcher(self.expect_template), ensure_unique_name=True)
-
-        self.assertEqual(exited, 0)
-
-    @stubs
-    def test_inputs(self, stubs):
-        exited = arvados_cwl.main(
-            ["--api=jobs", "--create-template",
-             "tests/wf/inputs_test.cwl", "tests/order/inputs_test_order.json"],
-            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
-
-        expect_template = copy.deepcopy(self.expect_template)
-        params = expect_template[
-            "components"]["inputs_test.cwl"]["script_parameters"]
-        params["fileInput"]["value"] = '169f39d466a5438ac4a90e779bf750c7+53/blorp.txt'
-        params["cwl:tool"] = 'a2de777156fb700f1363b1f2e370adca+60/workflow.cwl#main'
-        params["floatInput"]["value"] = 1.234
-        params["boolInput"]["value"] = True
-
-        stubs.api.pipeline_templates().create.assert_called_with(
-            body=JsonDiffMatcher(expect_template), ensure_unique_name=True)
-        self.assertEqual(exited, 0)
index 9d23c6a87e69d1502d073b59424a92574519cc7a..e8bbc08d8a27ae12deee011d7e93d9dca24cc5fe 100644 (file)
@@ -34,7 +34,7 @@ steps:
     hints:
       - class: arv:RunInSingleContainer
       - class: ResourceRequirement
-        ramMin: $(inputs.count*128)
+        ramMin: $(inputs.count*32)
       - class: arv:APIRequirement
     scatter: count
     run:
@@ -58,4 +58,4 @@ steps:
                 type: int
               script: File
             outputs: []
-            arguments: [python, $(inputs.script), $(inputs.count * 128)]
+            arguments: [python, $(inputs.script), $(inputs.count * 32)]
index 2701fd1c85991cc018b69a568fb8161dc520321f..15cc8df6a229e351f0be880dbd7be69c8b78c152 100644 (file)
@@ -44,7 +44,7 @@ steps:
       outputs: []
       hints:
         - class: ResourceRequirement
-          ramMin: $(inputs.count*128)
+          ramMin: $(inputs.count*32)
       steps:
         sleep1:
           in:
@@ -59,4 +59,4 @@ steps:
                 type: int
               script: File
             outputs: []
-            arguments: [python, $(inputs.script), $(inputs.count * 128)]
+            arguments: [python, $(inputs.script), $(inputs.count * 32)]
index 3accb324fb339fa3a1f8993f32719fc3526203a8..e07ff8deff08514669df5a5010d0027e5448a5ce 100644 (file)
@@ -50,10 +50,10 @@ steps:
             id: subtool
             hints:
               - class: ResourceRequirement
-                ramMin: $(inputs.count*128)
+                ramMin: $(inputs.count*32)
             inputs:
               count:
                 type: int
               script: File
             outputs: []
-            arguments: [python, $(inputs.script), $(inputs.count * 128)]
+            arguments: [python, $(inputs.script), $(inputs.count * 32)]
index 9a26d01132cf07d6ae1cb11aff97a627807ddca5..05e73c8df5c6f5cbbf709bf6b9831c1420978622 100644 (file)
@@ -53,10 +53,10 @@ steps:
             id: subtool
             hints:
               - class: ResourceRequirement
-                ramMin: 128
+                ramMin: 32
             inputs:
               count:
                 type: int
               script: File
             outputs: []
-            arguments: [python, $(inputs.script), "128"]
+            arguments: [python, $(inputs.script), "32"]
index bc434a20304bbbd129ed64d76307469c46a43fbc..93f52f3c809bb003c3bb506574d42e140728960c 100644 (file)
@@ -57,6 +57,15 @@ func (sc *Config) GetCluster(clusterID string) (*Cluster, error) {
        }
 }
 
+type WebDAVCacheConfig struct {
+       TTL                  Duration
+       UUIDTTL              Duration
+       MaxBlockEntries      int
+       MaxCollectionEntries int
+       MaxCollectionBytes   int64
+       MaxPermissionEntries int
+       MaxUUIDEntries       int
+}
 type Cluster struct {
        ClusterID       string `json:"-"`
        ManagementToken string
@@ -101,6 +110,8 @@ type Cluster struct {
                PreserveVersionIfIdle Duration
                TrashSweepInterval    Duration
                TrustAllContent       bool
+
+               WebDAVCache WebDAVCacheConfig
        }
        Git struct {
                Repositories string
@@ -272,13 +283,8 @@ type ContainersConfig struct {
        UsePreemptibleInstances     bool
 
        JobsAPI struct {
-               Enable                  string
-               GitInternalDir          string
-               DefaultDockerImage      string
-               CrunchJobWrapper        string
-               CrunchJobUser           string
-               CrunchRefreshTrigger    string
-               ReuseJobIfOutputsDiffer bool
+               Enable         string
+               GitInternalDir string
        }
        Logging struct {
                MaxAge                       Duration
index 372f09d14bb14f929a40020523c75da69617cdbc..ecd9e2c6e0263d1bf1c7f320f47f3e073f7f2220 100644 (file)
@@ -10,7 +10,6 @@ import (
        "net/http"
        "os"
        "testing"
-       "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        . "gopkg.in/check.v1"
@@ -114,55 +113,6 @@ func (s *ServerRequiredSuite) TestInvalidResourceType(c *C) {
        c.Assert(len(getback), Equals, 0)
 }
 
-func (s *ServerRequiredSuite) TestCreatePipelineTemplate(c *C) {
-       arv, err := MakeArvadosClient()
-
-       for _, idleConnections := range []bool{
-               false,
-               true,
-       } {
-               if idleConnections {
-                       arv.lastClosedIdlesAt = time.Now().Add(-time.Minute)
-               } else {
-                       arv.lastClosedIdlesAt = time.Now()
-               }
-
-               getback := make(Dict)
-               err = arv.Create("pipeline_templates",
-                       Dict{"pipeline_template": Dict{
-                               "name": "tmp",
-                               "components": Dict{
-                                       "c1": map[string]string{"script": "script1"},
-                                       "c2": map[string]string{"script": "script2"}}}},
-                       &getback)
-               c.Assert(err, Equals, nil)
-               c.Assert(getback["name"], Equals, "tmp")
-               c.Assert(getback["components"].(map[string]interface{})["c2"].(map[string]interface{})["script"], Equals, "script2")
-
-               uuid := getback["uuid"].(string)
-
-               getback = make(Dict)
-               err = arv.Get("pipeline_templates", uuid, nil, &getback)
-               c.Assert(err, Equals, nil)
-               c.Assert(getback["name"], Equals, "tmp")
-               c.Assert(getback["components"].(map[string]interface{})["c1"].(map[string]interface{})["script"], Equals, "script1")
-
-               getback = make(Dict)
-               err = arv.Update("pipeline_templates", uuid,
-                       Dict{
-                               "pipeline_template": Dict{"name": "tmp2"}},
-                       &getback)
-               c.Assert(err, Equals, nil)
-               c.Assert(getback["name"], Equals, "tmp2")
-
-               c.Assert(getback["uuid"].(string), Equals, uuid)
-               getback = make(Dict)
-               err = arv.Delete("pipeline_templates", uuid, nil, &getback)
-               c.Assert(err, Equals, nil)
-               c.Assert(getback["name"], Equals, "tmp2")
-       }
-}
-
 func (s *ServerRequiredSuite) TestErrorResponse(c *C) {
        arv, _ := MakeArvadosClient()
 
diff --git a/sdk/go/crunchrunner/crunchrunner.go b/sdk/go/crunchrunner/crunchrunner.go
deleted file mode 100644 (file)
index ca16fc6..0000000
+++ /dev/null
@@ -1,439 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package main
-
-import (
-       "encoding/json"
-       "fmt"
-       "io"
-       "io/ioutil"
-       "log"
-       "os"
-       "os/exec"
-       "os/signal"
-       "strings"
-       "syscall"
-
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-)
-
-type TaskDef struct {
-       Command            []string          `json:"command"`
-       Env                map[string]string `json:"task.env"`
-       Stdin              string            `json:"task.stdin"`
-       Stdout             string            `json:"task.stdout"`
-       Stderr             string            `json:"task.stderr"`
-       Vwd                map[string]string `json:"task.vwd"`
-       SuccessCodes       []int             `json:"task.successCodes"`
-       PermanentFailCodes []int             `json:"task.permanentFailCodes"`
-       TemporaryFailCodes []int             `json:"task.temporaryFailCodes"`
-       KeepTmpOutput      bool              `json:"task.keepTmpOutput"`
-}
-
-type Tasks struct {
-       Tasks []TaskDef `json:"tasks"`
-}
-
-type Job struct {
-       ScriptParameters Tasks `json:"script_parameters"`
-}
-
-type Task struct {
-       JobUUID              string  `json:"job_uuid"`
-       CreatedByJobTaskUUID string  `json:"created_by_job_task_uuid"`
-       Parameters           TaskDef `json:"parameters"`
-       Sequence             int     `json:"sequence"`
-       Output               string  `json:"output"`
-       Success              bool    `json:"success"`
-       Progress             float32 `json:"sequence"`
-}
-
-type IArvadosClient interface {
-       Create(resourceType string, parameters arvadosclient.Dict, output interface{}) error
-       Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error)
-}
-
-func setupDirectories(crunchtmpdir, taskUUID string, keepTmp bool) (tmpdir, outdir string, err error) {
-       tmpdir = crunchtmpdir + "/tmpdir"
-       err = os.Mkdir(tmpdir, 0700)
-       if err != nil {
-               return "", "", err
-       }
-
-       if keepTmp {
-               outdir = os.Getenv("TASK_KEEPMOUNT_TMP")
-       } else {
-               outdir = crunchtmpdir + "/outdir"
-               err = os.Mkdir(outdir, 0700)
-               if err != nil {
-                       return "", "", err
-               }
-       }
-
-       return tmpdir, outdir, nil
-}
-
-func checkOutputFilename(outdir, fn string) error {
-       if strings.HasPrefix(fn, "/") || strings.HasSuffix(fn, "/") {
-               return fmt.Errorf("Path must not start or end with '/'")
-       }
-       if strings.Index("../", fn) != -1 {
-               return fmt.Errorf("Path must not contain '../'")
-       }
-
-       sl := strings.LastIndex(fn, "/")
-       if sl != -1 {
-               os.MkdirAll(outdir+"/"+fn[0:sl], 0777)
-       }
-       return nil
-}
-
-func copyFile(dst, src string) error {
-       in, err := os.Open(src)
-       if err != nil {
-               return err
-       }
-       defer in.Close()
-
-       out, err := os.Create(dst)
-       if err != nil {
-               return err
-       }
-       defer out.Close()
-
-       _, err = io.Copy(out, in)
-       return err
-}
-
-func setupCommand(cmd *exec.Cmd, taskp TaskDef, outdir string, replacements map[string]string) (stdin, stdout, stderr string, err error) {
-       if taskp.Vwd != nil {
-               for k, v := range taskp.Vwd {
-                       v = substitute(v, replacements)
-                       err = checkOutputFilename(outdir, k)
-                       if err != nil {
-                               return "", "", "", err
-                       }
-                       if taskp.KeepTmpOutput {
-                               err = copyFile(v, outdir+"/"+k)
-                       } else {
-                               err = os.Symlink(v, outdir+"/"+k)
-                       }
-                       if err != nil {
-                               return "", "", "", err
-                       }
-               }
-       }
-
-       if taskp.Stdin != "" {
-               // Set up stdin redirection
-               stdin = substitute(taskp.Stdin, replacements)
-               cmd.Stdin, err = os.Open(stdin)
-               if err != nil {
-                       return "", "", "", err
-               }
-       }
-
-       if taskp.Stdout != "" {
-               err = checkOutputFilename(outdir, taskp.Stdout)
-               if err != nil {
-                       return "", "", "", err
-               }
-               // Set up stdout redirection
-               stdout = outdir + "/" + taskp.Stdout
-               cmd.Stdout, err = os.Create(stdout)
-               if err != nil {
-                       return "", "", "", err
-               }
-       } else {
-               cmd.Stdout = os.Stdout
-       }
-
-       if taskp.Stderr != "" {
-               err = checkOutputFilename(outdir, taskp.Stderr)
-               if err != nil {
-                       return "", "", "", err
-               }
-               // Set up stderr redirection
-               stderr = outdir + "/" + taskp.Stderr
-               cmd.Stderr, err = os.Create(stderr)
-               if err != nil {
-                       return "", "", "", err
-               }
-       } else {
-               cmd.Stderr = os.Stderr
-       }
-
-       if taskp.Env != nil {
-               // Set up subprocess environment
-               cmd.Env = os.Environ()
-               for k, v := range taskp.Env {
-                       v = substitute(v, replacements)
-                       cmd.Env = append(cmd.Env, k+"="+v)
-               }
-       }
-       return stdin, stdout, stderr, nil
-}
-
-// Set up signal handlers.  Go sends signal notifications to a "signal
-// channel".
-func setupSignals(cmd *exec.Cmd) chan os.Signal {
-       sigChan := make(chan os.Signal, 1)
-       signal.Notify(sigChan, syscall.SIGTERM)
-       signal.Notify(sigChan, syscall.SIGINT)
-       signal.Notify(sigChan, syscall.SIGQUIT)
-       return sigChan
-}
-
-func inCodes(code int, codes []int) bool {
-       if codes != nil {
-               for _, c := range codes {
-                       if code == c {
-                               return true
-                       }
-               }
-       }
-       return false
-}
-
-const TASK_TEMPFAIL = 111
-
-type TempFail struct{ error }
-type PermFail struct{}
-
-func (s PermFail) Error() string {
-       return "PermFail"
-}
-
-func substitute(inp string, subst map[string]string) string {
-       for k, v := range subst {
-               inp = strings.Replace(inp, k, v, -1)
-       }
-       return inp
-}
-
-func getKeepTmp(outdir string) (manifest string, err error) {
-       fn, err := os.Open(outdir + "/" + ".arvados#collection")
-       if err != nil {
-               return "", err
-       }
-       defer fn.Close()
-
-       buf, err := ioutil.ReadAll(fn)
-       if err != nil {
-               return "", err
-       }
-       collection := arvados.Collection{}
-       err = json.Unmarshal(buf, &collection)
-       return collection.ManifestText, err
-}
-
-func runner(api IArvadosClient,
-       kc IKeepClient,
-       jobUUID, taskUUID, crunchtmpdir, keepmount string,
-       jobStruct Job, taskStruct Task) error {
-
-       var err error
-       taskp := taskStruct.Parameters
-
-       // If this is task 0 and there are multiple tasks, dispatch subtasks
-       // and exit.
-       if taskStruct.Sequence == 0 {
-               if len(jobStruct.ScriptParameters.Tasks) == 1 {
-                       taskp = jobStruct.ScriptParameters.Tasks[0]
-               } else {
-                       for _, task := range jobStruct.ScriptParameters.Tasks {
-                               err := api.Create("job_tasks",
-                                       map[string]interface{}{
-                                               "job_task": Task{
-                                                       JobUUID:              jobUUID,
-                                                       CreatedByJobTaskUUID: taskUUID,
-                                                       Sequence:             1,
-                                                       Parameters:           task}},
-                                       nil)
-                               if err != nil {
-                                       return TempFail{err}
-                               }
-                       }
-                       err = api.Update("job_tasks", taskUUID,
-                               map[string]interface{}{
-                                       "job_task": map[string]interface{}{
-                                               "output":   "",
-                                               "success":  true,
-                                               "progress": 1.0}},
-                               nil)
-                       return nil
-               }
-       }
-
-       var tmpdir, outdir string
-       tmpdir, outdir, err = setupDirectories(crunchtmpdir, taskUUID, taskp.KeepTmpOutput)
-       if err != nil {
-               return TempFail{err}
-       }
-
-       replacements := map[string]string{
-               "$(task.tmpdir)": tmpdir,
-               "$(task.outdir)": outdir,
-               "$(task.keep)":   keepmount}
-
-       log.Printf("crunchrunner: $(task.tmpdir)=%v", tmpdir)
-       log.Printf("crunchrunner: $(task.outdir)=%v", outdir)
-       log.Printf("crunchrunner: $(task.keep)=%v", keepmount)
-
-       // Set up subprocess
-       for k, v := range taskp.Command {
-               taskp.Command[k] = substitute(v, replacements)
-       }
-
-       cmd := exec.Command(taskp.Command[0], taskp.Command[1:]...)
-
-       cmd.Dir = outdir
-
-       var stdin, stdout, stderr string
-       stdin, stdout, stderr, err = setupCommand(cmd, taskp, outdir, replacements)
-       if err != nil {
-               return err
-       }
-
-       // Run subprocess and wait for it to complete
-       if stdin != "" {
-               stdin = " < " + stdin
-       }
-       if stdout != "" {
-               stdout = " > " + stdout
-       }
-       if stderr != "" {
-               stderr = " 2> " + stderr
-       }
-       log.Printf("Running %v%v%v%v", cmd.Args, stdin, stdout, stderr)
-
-       var caughtSignal os.Signal
-       sigChan := setupSignals(cmd)
-
-       err = cmd.Start()
-       if err != nil {
-               signal.Stop(sigChan)
-               return TempFail{err}
-       }
-
-       finishedSignalNotify := make(chan struct{})
-       go func(sig <-chan os.Signal) {
-               for sig := range sig {
-                       caughtSignal = sig
-                       cmd.Process.Signal(caughtSignal)
-               }
-               close(finishedSignalNotify)
-       }(sigChan)
-
-       err = cmd.Wait()
-       signal.Stop(sigChan)
-
-       close(sigChan)
-       <-finishedSignalNotify
-
-       if caughtSignal != nil {
-               log.Printf("Caught signal %v", caughtSignal)
-               return PermFail{}
-       }
-
-       if err != nil {
-               // Run() returns ExitError on non-zero exit code, but we handle
-               // that down below.  So only return if it's not ExitError.
-               if _, ok := err.(*exec.ExitError); !ok {
-                       return TempFail{err}
-               }
-       }
-
-       var success bool
-
-       exitCode := cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
-
-       log.Printf("Completed with exit code %v", exitCode)
-
-       if inCodes(exitCode, taskp.PermanentFailCodes) {
-               success = false
-       } else if inCodes(exitCode, taskp.TemporaryFailCodes) {
-               return TempFail{fmt.Errorf("Process tempfail with exit code %v", exitCode)}
-       } else if inCodes(exitCode, taskp.SuccessCodes) || cmd.ProcessState.Success() {
-               success = true
-       } else {
-               success = false
-       }
-
-       // Upload output directory
-       var manifest string
-       if taskp.KeepTmpOutput {
-               manifest, err = getKeepTmp(outdir)
-       } else {
-               manifest, err = WriteTree(kc, outdir)
-       }
-       if err != nil {
-               return TempFail{err}
-       }
-
-       // Set status
-       err = api.Update("job_tasks", taskUUID,
-               map[string]interface{}{
-                       "job_task": Task{
-                               Output:   manifest,
-                               Success:  success,
-                               Progress: 1}},
-               nil)
-       if err != nil {
-               return TempFail{err}
-       }
-
-       if success {
-               return nil
-       } else {
-               return PermFail{}
-       }
-}
-
-func main() {
-       api, err := arvadosclient.MakeArvadosClient()
-       if err != nil {
-               log.Fatal(err)
-       }
-
-       jobUUID := os.Getenv("JOB_UUID")
-       taskUUID := os.Getenv("TASK_UUID")
-       tmpdir := os.Getenv("TASK_WORK")
-       keepmount := os.Getenv("TASK_KEEPMOUNT")
-
-       var jobStruct Job
-       var taskStruct Task
-
-       err = api.Get("jobs", jobUUID, nil, &jobStruct)
-       if err != nil {
-               log.Fatal(err)
-       }
-       err = api.Get("job_tasks", taskUUID, nil, &taskStruct)
-       if err != nil {
-               log.Fatal(err)
-       }
-
-       var kc IKeepClient
-       kc, err = keepclient.MakeKeepClient(api)
-       if err != nil {
-               log.Fatal(err)
-       }
-
-       syscall.Umask(0022)
-       err = runner(api, kc, jobUUID, taskUUID, tmpdir, keepmount, jobStruct, taskStruct)
-
-       if err == nil {
-               os.Exit(0)
-       } else if _, ok := err.(TempFail); ok {
-               log.Print(err)
-               os.Exit(TASK_TEMPFAIL)
-       } else if _, ok := err.(PermFail); ok {
-               os.Exit(1)
-       } else {
-               log.Fatal(err)
-       }
-}
diff --git a/sdk/go/crunchrunner/crunchrunner_test.go b/sdk/go/crunchrunner/crunchrunner_test.go
deleted file mode 100644 (file)
index f2827c6..0000000
+++ /dev/null
@@ -1,478 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package main
-
-import (
-       "io"
-       "io/ioutil"
-       "log"
-       "os"
-       "syscall"
-       "testing"
-       "time"
-
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       . "gopkg.in/check.v1"
-)
-
-// Gocheck boilerplate
-func Test(t *testing.T) {
-       TestingT(t)
-}
-
-type TestSuite struct{}
-
-// Gocheck boilerplate
-var _ = Suite(&TestSuite{})
-
-type ArvTestClient struct {
-       c        *C
-       manifest string
-       success  bool
-}
-
-func (t ArvTestClient) Create(resourceType string, parameters arvadosclient.Dict, output interface{}) error {
-       return nil
-}
-
-func (t ArvTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
-       t.c.Check(resourceType, Equals, "job_tasks")
-       t.c.Check(parameters, DeepEquals, arvadosclient.Dict{"job_task": Task{
-               Output:   t.manifest,
-               Success:  t.success,
-               Progress: 1}})
-       return nil
-}
-
-func (s *TestSuite) TestSimpleRun(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c, "", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"echo", "foo"}}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-}
-
-func checkOutput(c *C, tmpdir string) {
-       file, err := os.Open(tmpdir + "/outdir/output.txt")
-       c.Assert(err, IsNil)
-
-       data := make([]byte, 100)
-       var count int
-       err = nil
-       offset := 0
-       for err == nil {
-               count, err = file.Read(data[offset:])
-               offset += count
-       }
-       c.Assert(err, Equals, io.EOF)
-       c.Check(string(data[0:offset]), Equals, "foo\n")
-}
-
-func (s *TestSuite) TestSimpleRunSubtask(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c,
-               ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{
-                       {Command: []string{"echo", "bar"}},
-                       {Command: []string{"echo", "foo"}}}}},
-               Task{Parameters: TaskDef{
-                       Command: []string{"echo", "foo"},
-                       Stdout:  "output.txt"},
-                       Sequence: 1})
-       c.Check(err, IsNil)
-
-       checkOutput(c, tmpdir)
-}
-
-func (s *TestSuite) TestRedirect(c *C) {
-       tmpfile, _ := ioutil.TempFile("", "")
-       tmpfile.Write([]byte("foo\n"))
-       tmpfile.Close()
-       defer os.Remove(tmpfile.Name())
-
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c,
-               ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"cat"},
-                       Stdout:  "output.txt",
-                       Stdin:   tmpfile.Name()}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-
-       checkOutput(c, tmpdir)
-}
-
-func (s *TestSuite) TestEnv(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c, ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"/bin/sh", "-c", "echo $BAR"},
-                       Stdout:  "output.txt",
-                       Env:     map[string]string{"BAR": "foo"}}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-       checkOutput(c, tmpdir)
-}
-
-func (s *TestSuite) TestEnvSubstitute(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c, ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "foo\n",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"/bin/sh", "-c", "echo $BAR"},
-                       Stdout:  "output.txt",
-                       Env:     map[string]string{"BAR": "$(task.keep)"}}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-       checkOutput(c, tmpdir)
-}
-
-func (s *TestSuite) TestEnvReplace(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c, ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"/bin/sh", "-c", "echo $PATH"},
-                       Stdout:  "output.txt",
-                       Env:     map[string]string{"PATH": "foo"}}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-       checkOutput(c, tmpdir)
-}
-
-type SubtaskTestClient struct {
-       c     *C
-       parms []Task
-       i     int
-}
-
-func (t *SubtaskTestClient) Create(resourceType string, parameters arvadosclient.Dict, output interface{}) error {
-       t.c.Check(resourceType, Equals, "job_tasks")
-       t.c.Check(parameters, DeepEquals, arvadosclient.Dict{"job_task": t.parms[t.i]})
-       t.i += 1
-       return nil
-}
-
-func (t SubtaskTestClient) Update(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) (err error) {
-       return nil
-}
-
-func (s *TestSuite) TestScheduleSubtask(c *C) {
-
-       api := SubtaskTestClient{c, []Task{
-               {JobUUID: "zzzz-8i9sb-111111111111111",
-                       CreatedByJobTaskUUID: "zzzz-ot0gb-111111111111111",
-                       Sequence:             1,
-                       Parameters: TaskDef{
-                               Command: []string{"echo", "bar"}}},
-               {JobUUID: "zzzz-8i9sb-111111111111111",
-                       CreatedByJobTaskUUID: "zzzz-ot0gb-111111111111111",
-                       Sequence:             1,
-                       Parameters: TaskDef{
-                               Command: []string{"echo", "foo"}}}},
-               0}
-
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(&api, KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{
-                       {Command: []string{"echo", "bar"}},
-                       {Command: []string{"echo", "foo"}}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-
-}
-
-func (s *TestSuite) TestRunFail(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c, "", false}, KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"/bin/sh", "-c", "exit 1"}}}}},
-               Task{Sequence: 0})
-       c.Check(err, FitsTypeOf, PermFail{})
-}
-
-func (s *TestSuite) TestRunSuccessCode(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c, "", true}, KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command:      []string{"/bin/sh", "-c", "exit 1"},
-                       SuccessCodes: []int{0, 1}}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-}
-
-func (s *TestSuite) TestRunFailCode(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c, "", false}, KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command:            []string{"/bin/sh", "-c", "exit 0"},
-                       PermanentFailCodes: []int{0, 1}}}}},
-               Task{Sequence: 0})
-       c.Check(err, FitsTypeOf, PermFail{})
-}
-
-func (s *TestSuite) TestRunTempFailCode(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c, "", false}, KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command:            []string{"/bin/sh", "-c", "exit 1"},
-                       TemporaryFailCodes: []int{1}}}}},
-               Task{Sequence: 0})
-       c.Check(err, FitsTypeOf, TempFail{})
-}
-
-func (s *TestSuite) TestVwd(c *C) {
-       tmpfile, _ := ioutil.TempFile("", "")
-       tmpfile.Write([]byte("foo\n"))
-       tmpfile.Close()
-       defer os.Remove(tmpfile.Name())
-
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c, ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"ls", "output.txt"},
-                       Vwd: map[string]string{
-                               "output.txt": tmpfile.Name()}}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-       checkOutput(c, tmpdir)
-}
-
-func (s *TestSuite) TestSubstitutionStdin(c *C) {
-       keepmount, _ := ioutil.TempDir("", "")
-       ioutil.WriteFile(keepmount+"/"+"file1.txt", []byte("foo\n"), 0600)
-       defer func() {
-               os.RemoveAll(keepmount)
-       }()
-
-       log.Print("Keepmount is ", keepmount)
-
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       log.Print("tmpdir is ", tmpdir)
-
-       err := runner(ArvTestClient{c,
-               ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               keepmount,
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"cat"},
-                       Stdout:  "output.txt",
-                       Stdin:   "$(task.keep)/file1.txt"}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-       checkOutput(c, tmpdir)
-}
-
-func (s *TestSuite) TestSubstitutionCommandLine(c *C) {
-       keepmount, _ := ioutil.TempDir("", "")
-       ioutil.WriteFile(keepmount+"/"+"file1.txt", []byte("foo\n"), 0600)
-       defer func() {
-               os.RemoveAll(keepmount)
-       }()
-
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c,
-               ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:output.txt\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               keepmount,
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"cat", "$(task.keep)/file1.txt"},
-                       Stdout:  "output.txt"}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-
-       checkOutput(c, tmpdir)
-}
-
-func (s *TestSuite) TestSignal(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       go func() {
-               time.Sleep(1 * time.Second)
-               self, _ := os.FindProcess(os.Getpid())
-               self.Signal(syscall.SIGINT)
-       }()
-
-       err := runner(ArvTestClient{c,
-               "", false},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"sleep", "4"}}}}},
-               Task{Sequence: 0})
-       c.Check(err, FitsTypeOf, PermFail{})
-
-}
-
-func (s *TestSuite) TestQuoting(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       err := runner(ArvTestClient{c,
-               "./s\\040ub:dir d3b07384d113edec49eaa6238ad5ff00+4 0:4::e\\040vil\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command: []string{"echo", "foo"},
-                       Stdout:  "s ub:dir/:e vi\nl"}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-}
-
-func (s *TestSuite) TestKeepTmp(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       os.Setenv("TASK_KEEPMOUNT_TMP", tmpdir)
-       defer os.Setenv("TASK_KEEPMOUNT_TMP", "")
-
-       fn, err := os.Create(tmpdir + "/.arvados#collection")
-       fn.Write([]byte("{\"manifest_text\":\". unparsed 0:3:foo\\n\",\"uuid\":null}"))
-       defer fn.Close()
-
-       err = runner(ArvTestClient{c,
-               ". unparsed 0:3:foo\n", true},
-               KeepTestClient{},
-               "zzzz-8i9sb-111111111111111",
-               "zzzz-ot0gb-111111111111111",
-               tmpdir,
-               "",
-               Job{ScriptParameters: Tasks{[]TaskDef{{
-                       Command:       []string{"echo", "foo"},
-                       KeepTmpOutput: true}}}},
-               Task{Sequence: 0})
-       c.Check(err, IsNil)
-
-}
diff --git a/sdk/go/crunchrunner/upload.go b/sdk/go/crunchrunner/upload.go
deleted file mode 100644 (file)
index 2848d10..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package main
-
-import (
-       "bytes"
-       "crypto/md5"
-       "errors"
-       "fmt"
-       "io"
-       "log"
-       "os"
-       "path/filepath"
-       "sort"
-       "strings"
-
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/sdk/go/manifest"
-)
-
-type Block struct {
-       data   []byte
-       offset int64
-}
-
-type ManifestStreamWriter struct {
-       *ManifestWriter
-       *manifest.ManifestStream
-       offset int64
-       *Block
-       uploader chan *Block
-       finish   chan []error
-}
-
-type IKeepClient interface {
-       PutHB(hash string, buf []byte) (string, int, error)
-}
-
-func (m *ManifestStreamWriter) Write(p []byte) (int, error) {
-       n, err := m.ReadFrom(bytes.NewReader(p))
-       return int(n), err
-}
-
-func (m *ManifestStreamWriter) ReadFrom(r io.Reader) (n int64, err error) {
-       var total int64
-       var count int
-
-       for err == nil {
-               if m.Block == nil {
-                       m.Block = &Block{make([]byte, keepclient.BLOCKSIZE), 0}
-               }
-               count, err = r.Read(m.Block.data[m.Block.offset:])
-               total += int64(count)
-               m.Block.offset += int64(count)
-               if m.Block.offset == keepclient.BLOCKSIZE {
-                       m.uploader <- m.Block
-                       m.Block = nil
-               }
-       }
-
-       if err == io.EOF {
-               return total, nil
-       } else {
-               return total, err
-       }
-
-}
-
-func (m *ManifestStreamWriter) goUpload() {
-       var errors []error
-       uploader := m.uploader
-       finish := m.finish
-       for block := range uploader {
-               hash := fmt.Sprintf("%x", md5.Sum(block.data[0:block.offset]))
-               signedHash, _, err := m.ManifestWriter.IKeepClient.PutHB(hash, block.data[0:block.offset])
-               if err != nil {
-                       errors = append(errors, err)
-               } else {
-                       m.ManifestStream.Blocks = append(m.ManifestStream.Blocks, signedHash)
-               }
-       }
-       finish <- errors
-}
-
-type ManifestWriter struct {
-       IKeepClient
-       stripPrefix string
-       Streams     map[string]*ManifestStreamWriter
-}
-
-func (m *ManifestWriter) WalkFunc(path string, info os.FileInfo, err error) error {
-       if err != nil {
-               return err
-       }
-
-       targetPath, targetInfo := path, info
-       if info.Mode()&os.ModeSymlink != 0 {
-               // Update targetpath/info to reflect the symlink
-               // target, not the symlink itself
-               targetPath, err = filepath.EvalSymlinks(path)
-               if err != nil {
-                       return err
-               }
-               targetInfo, err = os.Stat(targetPath)
-               if err != nil {
-                       return fmt.Errorf("stat symlink %q target %q: %s", path, targetPath, err)
-               }
-       }
-
-       if targetInfo.Mode()&os.ModeType != 0 {
-               // Skip directories, pipes, other non-regular files
-               return nil
-       }
-
-       var dir string
-       if len(path) > (len(m.stripPrefix) + len(info.Name()) + 1) {
-               dir = path[len(m.stripPrefix)+1 : (len(path) - len(info.Name()) - 1)]
-       }
-       if dir == "" {
-               dir = "."
-       }
-
-       fn := path[(len(path) - len(info.Name())):]
-
-       if m.Streams[dir] == nil {
-               m.Streams[dir] = &ManifestStreamWriter{
-                       m,
-                       &manifest.ManifestStream{StreamName: dir},
-                       0,
-                       nil,
-                       make(chan *Block),
-                       make(chan []error)}
-               go m.Streams[dir].goUpload()
-       }
-
-       stream := m.Streams[dir]
-
-       fileStart := stream.offset
-
-       file, err := os.Open(path)
-       if err != nil {
-               return err
-       }
-
-       log.Printf("Uploading %v/%v (%v bytes)", dir, fn, info.Size())
-
-       var count int64
-       count, err = io.Copy(stream, file)
-       if err != nil {
-               return err
-       }
-
-       stream.offset += count
-
-       stream.ManifestStream.FileStreamSegments = append(stream.ManifestStream.FileStreamSegments,
-               manifest.FileStreamSegment{uint64(fileStart), uint64(count), fn})
-
-       return nil
-}
-
-func (m *ManifestWriter) Finish() error {
-       var errstring string
-       for _, stream := range m.Streams {
-               if stream.uploader == nil {
-                       continue
-               }
-               if stream.Block != nil {
-                       stream.uploader <- stream.Block
-               }
-               close(stream.uploader)
-               stream.uploader = nil
-
-               errors := <-stream.finish
-               close(stream.finish)
-               stream.finish = nil
-
-               for _, r := range errors {
-                       errstring = fmt.Sprintf("%v%v\n", errstring, r.Error())
-               }
-       }
-       if errstring != "" {
-               return errors.New(errstring)
-       } else {
-               return nil
-       }
-}
-
-func (m *ManifestWriter) ManifestText() string {
-       m.Finish()
-       var buf bytes.Buffer
-
-       dirs := make([]string, len(m.Streams))
-       i := 0
-       for k := range m.Streams {
-               dirs[i] = k
-               i++
-       }
-       sort.Strings(dirs)
-
-       for _, k := range dirs {
-               v := m.Streams[k]
-
-               if k == "." {
-                       buf.WriteString(".")
-               } else {
-                       k = strings.Replace(k, " ", "\\040", -1)
-                       k = strings.Replace(k, "\n", "", -1)
-                       buf.WriteString("./" + k)
-               }
-               for _, b := range v.Blocks {
-                       buf.WriteString(" ")
-                       buf.WriteString(b)
-               }
-               for _, f := range v.FileStreamSegments {
-                       buf.WriteString(" ")
-                       name := strings.Replace(f.Name, " ", "\\040", -1)
-                       name = strings.Replace(name, "\n", "", -1)
-                       buf.WriteString(fmt.Sprintf("%d:%d:%s", f.SegPos, f.SegLen, name))
-               }
-               buf.WriteString("\n")
-       }
-       return buf.String()
-}
-
-func WriteTree(kc IKeepClient, root string) (manifest string, err error) {
-       mw := ManifestWriter{kc, root, map[string]*ManifestStreamWriter{}}
-       err = filepath.Walk(root, mw.WalkFunc)
-
-       if err != nil {
-               return "", err
-       }
-
-       err = mw.Finish()
-       if err != nil {
-               return "", err
-       }
-
-       return mw.ManifestText(), nil
-}
diff --git a/sdk/go/crunchrunner/upload_test.go b/sdk/go/crunchrunner/upload_test.go
deleted file mode 100644 (file)
index 5bc7492..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package main
-
-import (
-       "crypto/md5"
-       "errors"
-       "fmt"
-       "io/ioutil"
-       "os"
-       "syscall"
-
-       . "gopkg.in/check.v1"
-)
-
-type UploadTestSuite struct{}
-
-// Gocheck boilerplate
-var _ = Suite(&UploadTestSuite{})
-
-type KeepTestClient struct {
-}
-
-func (k KeepTestClient) PutHB(hash string, buf []byte) (string, int, error) {
-       return fmt.Sprintf("%x+%v", md5.Sum(buf), len(buf)), len(buf), nil
-}
-
-func (s *TestSuite) TestSimpleUpload(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
-
-       str, err := WriteTree(KeepTestClient{}, tmpdir)
-       c.Check(err, IsNil)
-       c.Check(str, Equals, ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt\n")
-}
-
-func (s *TestSuite) TestSimpleUploadThreeFiles(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       for _, err := range []error{
-               ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600),
-               ioutil.WriteFile(tmpdir+"/"+"file2.txt", []byte("bar"), 0600),
-               os.Symlink("./file2.txt", tmpdir+"/file3.txt"),
-               syscall.Mkfifo(tmpdir+"/ignore.fifo", 0600),
-       } {
-               c.Assert(err, IsNil)
-       }
-
-       str, err := WriteTree(KeepTestClient{}, tmpdir)
-       c.Check(err, IsNil)
-       c.Check(str, Equals, ". aa65a413921163458c52fea478d5d3ee+9 0:3:file1.txt 3:3:file2.txt 6:3:file3.txt\n")
-}
-
-func (s *TestSuite) TestSimpleUploadSubdir(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       os.Mkdir(tmpdir+"/subdir", 0700)
-
-       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
-       ioutil.WriteFile(tmpdir+"/subdir/file2.txt", []byte("bar"), 0600)
-
-       str, err := WriteTree(KeepTestClient{}, tmpdir)
-       c.Check(err, IsNil)
-       c.Check(str, Equals, `. acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt
-./subdir 37b51d194a7513e45b56f6524f2d51f2+3 0:3:file2.txt
-`)
-}
-
-func (s *TestSuite) TestSimpleUploadLarge(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       file, _ := os.Create(tmpdir + "/" + "file1.txt")
-       data := make([]byte, 1024*1024-1)
-       for i := range data {
-               data[i] = byte(i % 10)
-       }
-       for i := 0; i < 65; i++ {
-               file.Write(data)
-       }
-       file.Close()
-
-       ioutil.WriteFile(tmpdir+"/"+"file2.txt", []byte("bar"), 0600)
-
-       str, err := WriteTree(KeepTestClient{}, tmpdir)
-       c.Check(err, IsNil)
-       c.Check(str, Equals, ". 00ecf01e0d93385115c9f8bed757425d+67108864 485cd630387b6b1846fe429f261ea05f+1048514 0:68157375:file1.txt 68157375:3:file2.txt\n")
-}
-
-func (s *TestSuite) TestUploadEmptySubdir(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       os.Mkdir(tmpdir+"/subdir", 0700)
-
-       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
-
-       str, err := WriteTree(KeepTestClient{}, tmpdir)
-       c.Check(err, IsNil)
-       c.Check(str, Equals, `. acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt
-`)
-}
-
-func (s *TestSuite) TestUploadEmptyFile(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte(""), 0600)
-
-       str, err := WriteTree(KeepTestClient{}, tmpdir)
-       c.Check(err, IsNil)
-       c.Check(str, Equals, `. d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1.txt
-`)
-}
-
-type KeepErrorTestClient struct {
-}
-
-func (k KeepErrorTestClient) PutHB(hash string, buf []byte) (string, int, error) {
-       return "", 0, errors.New("Failed!")
-}
-
-func (s *TestSuite) TestUploadError(c *C) {
-       tmpdir, _ := ioutil.TempDir("", "")
-       defer func() {
-               os.RemoveAll(tmpdir)
-       }()
-
-       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
-
-       str, err := WriteTree(KeepErrorTestClient{}, tmpdir)
-       c.Check(err, NotNil)
-       c.Check(str, Equals, "")
-}
index b17ed291807ab88de5948cfcdfaf6562bea5d009..a45775470a0ca49a731ba839803eb8b5f43be124 100644 (file)
@@ -42,32 +42,6 @@ from arvados._version import __version__
 logger = logging.getLogger('arvados.arv-run')
 logger.setLevel(logging.INFO)
 
-arvrun_parser = argparse.ArgumentParser(parents=[arv_cmd.retry_opt])
-arvrun_parser.add_argument('--dry-run', action="store_true",
-                           help="Print out the pipeline that would be submitted and exit")
-arvrun_parser.add_argument('--local', action="store_true",
-                           help="Run locally using arv-run-pipeline-instance")
-arvrun_parser.add_argument('--docker-image', type=str,
-                           help="Docker image to use, otherwise use instance default.")
-arvrun_parser.add_argument('--ignore-rcode', action="store_true",
-                           help="Commands that return non-zero return codes should not be considered failed.")
-arvrun_parser.add_argument('--no-reuse', action="store_true",
-                           help="Do not reuse past jobs.")
-arvrun_parser.add_argument('--no-wait', action="store_true",
-                           help="Do not wait and display logs after submitting command, just exit.")
-arvrun_parser.add_argument('--project-uuid', type=str,
-                           help="Parent project of the pipeline")
-arvrun_parser.add_argument('--git-dir', type=str, default="",
-                           help="Git repository passed to arv-crunch-job when using --local")
-arvrun_parser.add_argument('--repository', type=str, default="arvados",
-                           help="repository field of component, default 'arvados'")
-arvrun_parser.add_argument('--script-version', type=str, default="master",
-                           help="script_version field of component, default 'master'")
-arvrun_parser.add_argument('--version', action='version',
-                           version="%s %s" % (sys.argv[0], __version__),
-                           help='Print version and exit.')
-arvrun_parser.add_argument('args', nargs=argparse.REMAINDER)
-
 class ArvFile(object):
     def __init__(self, prefix, fn):
         self.prefix = prefix
@@ -270,164 +244,7 @@ def uploadfiles(files, api, dry_run=False, num_retries=0,
 
 
 def main(arguments=None):
-    args = arvrun_parser.parse_args(arguments)
-
-    if len(args.args) == 0:
-        arvrun_parser.print_help()
-        return
-
-    starting_args = args.args
-
-    reading_into = 2
-
-    # Parse the command arguments into 'slots'.
-    # All words following '>' are output arguments and are collected into slots[0].
-    # All words following '<' are input arguments and are collected into slots[1].
-    # slots[2..] store the parameters of each command in the pipeline.
-    #
-    # e.g. arv-run foo arg1 arg2 '|' bar arg3 arg4 '<' input1 input2 input3 '>' output.txt
-    # will be parsed into:
-    #   [['output.txt'],
-    #    ['input1', 'input2', 'input3'],
-    #    ['foo', 'arg1', 'arg2'],
-    #    ['bar', 'arg3', 'arg4']]
-    slots = [[], [], []]
-    for c in args.args:
-        if c.startswith('>'):
-            reading_into = 0
-            if len(c) > 1:
-                slots[reading_into].append(c[1:])
-        elif c.startswith('<'):
-            reading_into = 1
-            if len(c) > 1:
-                slots[reading_into].append(c[1:])
-        elif c == '|':
-            reading_into = len(slots)
-            slots.append([])
-        else:
-            slots[reading_into].append(c)
-
-    if slots[0] and len(slots[0]) > 1:
-        logger.error("Can only specify a single stdout file (run-command substitutions are permitted)")
-        return
-
-    if not args.dry_run:
-        api = arvados.api('v1')
-        if args.project_uuid:
-            project = args.project_uuid
-        else:
-            project = determine_project(os.getcwd(), api.users().current().execute()["uuid"])
-
-    # Identify input files.  Look at each parameter and test to see if there is
-    # a file by that name.  This uses 'patterns' to look for within
-    # command line arguments, such as --foo=file.txt or -lfile.txt
-    patterns = [re.compile("([^=]+=)(.*)"),
-                re.compile("(-[A-Za-z])(.+)")]
-    for j, command in enumerate(slots[1:]):
-        for i, a in enumerate(command):
-            if j > 0 and i == 0:
-                # j == 0 is stdin, j > 0 is commands
-                # always skip program executable (i == 0) in commands
-                pass
-            elif a.startswith('\\'):
-                # if it starts with a \ then don't do any interpretation
-                command[i] = a[1:]
-            else:
-                # See if it looks like a file
-                command[i] = statfile('', a)
-
-                # If a file named command[i] was found, it would now be an
-                # ArvFile or UploadFile.  If command[i] is a basestring, that
-                # means it doesn't correspond exactly to a file, so do some
-                # pattern matching.
-                if isinstance(command[i], basestring):
-                    for p in patterns:
-                        m = p.match(a)
-                        if m:
-                            command[i] = statfile(m.group(1), m.group(2))
-                            break
-
-    files = [c for command in slots[1:] for c in command if isinstance(c, UploadFile)]
-    if files:
-        uploadfiles(files, api, dry_run=args.dry_run, num_retries=args.retries, project=project)
-
-    for i in range(1, len(slots)):
-        slots[i] = [("%s%s" % (c.prefix, c.fn)) if isinstance(c, ArvFile) else c for c in slots[i]]
-
-    component = {
-        "script": "run-command",
-        "script_version": args.script_version,
-        "repository": args.repository,
-        "script_parameters": {
-        },
-        "runtime_constraints": {}
-    }
-
-    if args.docker_image:
-        component["runtime_constraints"]["docker_image"] = args.docker_image
-
-    task_foreach = []
-    group_parser = argparse.ArgumentParser()
-    group_parser.add_argument('-b', '--batch-size', type=int)
-    group_parser.add_argument('args', nargs=argparse.REMAINDER)
-
-    for s in range(2, len(slots)):
-        for i in range(0, len(slots[s])):
-            if slots[s][i] == '--':
-                inp = "input%i" % (s-2)
-                groupargs = group_parser.parse_args(slots[2][i+1:])
-                if groupargs.batch_size:
-                    component["script_parameters"][inp] = {"value": {"batch":groupargs.args, "size":groupargs.batch_size}}
-                    slots[s] = slots[s][0:i] + [{"foreach": inp, "command": "$(%s)" % inp}]
-                else:
-                    component["script_parameters"][inp] = groupargs.args
-                    slots[s] = slots[s][0:i] + ["$(%s)" % inp]
-                task_foreach.append(inp)
-                break
-            if slots[s][i] == '\--':
-                slots[s][i] = '--'
-
-    if slots[0]:
-        component["script_parameters"]["task.stdout"] = slots[0][0]
-    if slots[1]:
-        task_foreach.append("stdin")
-        component["script_parameters"]["stdin"] = slots[1]
-        component["script_parameters"]["task.stdin"] = "$(stdin)"
-
-    if task_foreach:
-        component["script_parameters"]["task.foreach"] = task_foreach
-
-    component["script_parameters"]["command"] = slots[2:]
-    if args.ignore_rcode:
-        component["script_parameters"]["task.ignore_rcode"] = args.ignore_rcode
-
-    pipeline = {
-        "name": "arv-run " + " | ".join([s[0] for s in slots[2:]]),
-        "description": "@" + " ".join(starting_args) + "@",
-        "components": {
-            "command": component
-        },
-        "state": "RunningOnClient" if args.local else "RunningOnServer"
-    }
-
-    if args.dry_run:
-        print(json.dumps(pipeline, indent=4))
-    else:
-        pipeline["owner_uuid"] = project
-        pi = api.pipeline_instances().create(body=pipeline, ensure_unique_name=True).execute()
-        logger.info("Running pipeline %s", pi["uuid"])
-
-        if args.local:
-            subprocess.call(["arv-run-pipeline-instance", "--instance", pi["uuid"], "--run-jobs-here"] + (["--no-reuse"] if args.no_reuse else []))
-        elif not args.no_wait:
-            ws.main(["--pipeline", pi["uuid"]])
-
-        pi = api.pipeline_instances().get(uuid=pi["uuid"]).execute()
-        logger.info("Pipeline is %s", pi["state"])
-        if "output_uuid" in pi["components"]["command"]:
-            logger.info("Output is %s", pi["components"]["command"]["output_uuid"])
-        else:
-            logger.info("No output")
+    raise Exception("Legacy arv-run removed.")
 
 if __name__ == '__main__':
     main()
diff --git a/sdk/python/bin/arv-run b/sdk/python/bin/arv-run
deleted file mode 100755 (executable)
index ebba201..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from arvados.commands.run import main
-main()
index 0fc2dde31b8a7851ff3db066062f068ebdbc9ff7..5130d187d3871a4db09a4c82f6c204137c23fd97 100644 (file)
@@ -40,7 +40,6 @@ setup(name='arvados-python-client',
           'bin/arv-federation-migrate',
           'bin/arv-normalize',
           'bin/arv-put',
-          'bin/arv-run',
           'bin/arv-ws'
       ],
       data_files=[
index a7b8bacdc340bcbd9c7286187fca1bfce3309d44..6010ee4bf73e0fc0278c672b41a20c0ecaa35532 100644 (file)
@@ -78,30 +78,14 @@ http {
     ssl_certificate_key "{{SSLKEY}}";
     location  / {
       proxy_pass http://keep-web;
+      proxy_set_header Host $http_host;
       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
       proxy_set_header X-Forwarded-Proto https;
+      proxy_redirect off;
 
       client_max_body_size 0;
       proxy_http_version 1.1;
       proxy_request_buffering off;
-
-      # Unlike other proxy sections, here we need to override the
-      # requested Host header and use proxy_redirect because of the
-      # way the test suite orchestrates services. Keep-web's "download
-      # only" behavior relies on the Host header matching a configured
-      # value, but when run_test_servers.py writes keep-web's command
-      # line, the keep-web-dl TLS port (which clients will connect to
-      # and include in their Host header) has not yet been assigned.
-      #
-      # In production, "proxy_set_header Host $http_host;
-      # proxy_redirect off;" works: keep-web's redirect URLs will
-      # match the request URL received by Nginx.
-      #
-      # Here, keep-web will issue redirects to https://download/ and
-      # Nginx will rewrite them.
-      #
-      proxy_set_header Host  download;
-      proxy_redirect https://download/ https://$host:{{KEEPWEBDLSSLPORT}}/;
     }
   }
   upstream ws {
index cde0cefb6e8aa23c723883100c054713eb671458..e79b4843a268049e117d60e59a37a8de62e61fbb 100644 (file)
@@ -605,14 +605,9 @@ def run_keep_web():
 
     keepwebport = internal_port_from_config("WebDAV")
     env = os.environ.copy()
-    env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
     logf = open(_logfilename('keep-web'), 'a')
     keepweb = subprocess.Popen(
-        ['keep-web',
-         '-allow-anonymous',
-         '-attachment-only-host=download',
-         '-management-token=e687950a23c3a9bceec28c6223a06c79',
-         '-listen=:'+str(keepwebport)],
+        ['keep-web'],
         env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
     with open(_pidfile('keep-web'), 'w') as f:
         f.write(str(keepweb.pid))
@@ -746,7 +741,13 @@ def setup_config():
                 "TLS": {
                     "Insecure": True
                 },
-                "Services": services
+                "Services": services,
+                "Users": {
+                    "AnonymousUserToken": auth_token('anonymous')
+                },
+                "Collections": {
+                    "TrustAllContent": True
+                }
             }
         }
     }
diff --git a/sdk/python/tests/test_arv_run.py b/sdk/python/tests/test_arv_run.py
deleted file mode 100644 (file)
index 1afc120..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from __future__ import absolute_import
-import os
-import sys
-import tempfile
-import unittest
-
-import arvados.commands.run as arv_run
-from . import arvados_testutil as tutil
-
-class ArvRunTestCase(unittest.TestCase, tutil.VersionChecker):
-    def run_arv_run(self, args):
-        sys.argv = ['arv-run'] + args
-        return arv_run.main()
-
-    def test_unsupported_arg(self):
-        with self.assertRaises(SystemExit):
-            self.run_arv_run(['-x=unknown'])
-
-    def test_version_argument(self):
-        with tutil.redirected_streams(
-                stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
-            with self.assertRaises(SystemExit):
-                self.run_arv_run(['--version'])
-        self.assertVersionOutput(out, err)
diff --git a/sdk/python/tests/test_pipeline_template.py b/sdk/python/tests/test_pipeline_template.py
deleted file mode 100644 (file)
index 88138f3..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from __future__ import absolute_import
-# usage example:
-#
-# ARVADOS_API_TOKEN=abc ARVADOS_API_HOST=arvados.local python -m unittest discover
-
-import unittest
-import arvados
-import apiclient
-from . import run_test_server
-
-class PipelineTemplateTest(run_test_server.TestCaseWithServers):
-    MAIN_SERVER = {}
-    KEEP_SERVER = {}
-
-    def runTest(self):
-        run_test_server.authorize_with("admin")
-        pt_uuid = arvados.api('v1').pipeline_templates().create(
-            body={'name':__file__}
-            ).execute()['uuid']
-        self.assertEqual(len(pt_uuid), 27,
-                         'Unexpected format of pipeline template UUID ("%s")'
-                         % pt_uuid)
-        components = {
-            'x': 'x',
-            '-x-': [1,2,{'foo':'bar'}],
-            'Boggis': {'Bunce': '[\'Bean\']'},
-            'SpassBox': True,
-            'spass_box': False,
-            'spass-box': [True, 'Maybe', False]
-            }
-        update_response = arvados.api('v1').pipeline_templates().update(
-            uuid=pt_uuid,
-            body={'components':components}
-            ).execute()
-        self.assertEqual('uuid' in update_response, True,
-                         'update() response did not include a uuid')
-        self.assertEqual(update_response['uuid'], pt_uuid,
-                         'update() response has a different uuid (%s, not %s)'
-                         % (update_response['uuid'], pt_uuid))
-        self.assertEqual(update_response['name'], __file__,
-                         'update() response has a different name (%s, not %s)'
-                         % (update_response['name'], __file__))
-        get_response = arvados.api('v1').pipeline_templates().get(
-            uuid=pt_uuid
-            ).execute()
-        self.assertEqual(get_response['components'], components,
-                         'components got munged by server (%s -> %s)'
-                         % (components, update_response['components']))
-        delete_response = arvados.api('v1').pipeline_templates().delete(
-            uuid=pt_uuid
-            ).execute()
-        self.assertEqual(delete_response['uuid'], pt_uuid,
-                         'delete() response has wrong uuid (%s, not %s)'
-                         % (delete_response['uuid'], pt_uuid))
-        with self.assertRaises(apiclient.errors.HttpError):
-            geterror_response = arvados.api('v1').pipeline_templates().get(
-                uuid=pt_uuid
-                ).execute()
index b9c87a68f2d9f17e7893f6689e106a0132e65c5f..76c62cb0ce9a5c5424db155bfec1c2ace5ac6df8 100644 (file)
@@ -76,34 +76,3 @@ class CurrentJobTestCase(ApiClientRetryTestMixin, unittest.TestCase):
 
     def run_method(self):
         arvados.current_job()
-
-
-class CurrentTaskTestCase(ApiClientRetryTestMixin, unittest.TestCase):
-
-    DEFAULT_EXCEPTION = arvados.errors.ApiError
-
-    def setUp(self):
-        super(CurrentTaskTestCase, self).setUp()
-        os.environ['TASK_UUID'] = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz'
-        os.environ['TASK_WORK'] = '.'
-
-    def tearDown(self):
-        del os.environ['TASK_UUID']
-        del os.environ['TASK_WORK']
-        arvados._current_task = None
-        super(CurrentTaskTestCase, self).tearDown()
-
-    def run_method(self):
-        arvados.current_task()
-
-
-class TaskSetOutputTestCase(CurrentTaskTestCase, unittest.TestCase):
-
-    DEFAULT_EXCEPTION = arvados.errors.ApiError
-
-    def tearDown(self):
-        super(TaskSetOutputTestCase, self).tearDown()
-        run_test_server.reset()
-
-    def run_method(self, locator=ApiClientRetryTestMixin.TEST_LOCATOR):
-        arvados.task_set_output({'uuid':self.TEST_UUID},s=locator)
index 07bbc33ab2f599245e68af092145f24130536761..b960d2e9e458d6c186b3f34a7b2f3db70e3faa1e 100644 (file)
@@ -4,4 +4,9 @@
 
 class Arvados::V1::JobTasksController < ApplicationController
   accept_attribute_as_json :parameters, Hash
+
+  def create
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
+  end
 end
index c3655272ddf69687899012b2da2dd1e293b06a4f..58a3fd168deed0d5615973fb1578cb619ed13abc 100644 (file)
@@ -13,115 +13,27 @@ class Arvados::V1::JobsController < ApplicationController
   include DbCurrentTime
 
   def create
-    [:repository, :script, :script_version, :script_parameters].each do |r|
-      if !resource_attrs[r]
-        return send_error("#{r} attribute must be specified",
-                          status: :unprocessable_entity)
-      end
-    end
-
-    # We used to ask for the minimum_, exclude_, and no_reuse params
-    # in the job resource. Now we advertise them as flags that alter
-    # the behavior of the create action.
-    [:minimum_script_version, :exclude_script_versions].each do |attr|
-      if resource_attrs.has_key? attr
-        params[attr] = resource_attrs.delete attr
-      end
-    end
-    if resource_attrs.has_key? :no_reuse
-      params[:find_or_create] = !resource_attrs.delete(:no_reuse)
-    end
-
-    return super if !params[:find_or_create]
-    return if !load_filters_param
-
-    begin
-      @object = Job.find_reusable(resource_attrs, params, @filters, @read_users)
-    rescue ArgumentError => error
-      return send_error(error.message)
-    end
-
-    if @object
-      show
-    else
-      super
-    end
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 
   def cancel
-    reload_object_before_update
-    @object.cancel cascade: params[:cascade]
-    show
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 
   def lock
-    @object.lock current_user.uuid
-    show
-  end
-
-  class LogStreamer
-    Q_UPDATE_INTERVAL = 12
-    def initialize(job, opts={})
-      @job = job
-      @opts = opts
-    end
-    def each
-      if @job.finished_at
-        yield "#{@job.uuid} finished at #{@job.finished_at}\n"
-        return
-      end
-      while not @job.started_at
-        # send a summary (job queue + available nodes) to the client
-        # every few seconds while waiting for the job to start
-        current_time = db_current_time
-        last_ack_at ||= current_time - Q_UPDATE_INTERVAL - 1
-        if current_time - last_ack_at >= Q_UPDATE_INTERVAL
-          nodes_in_state = {idle: 0, alloc: 0}
-          ActiveRecord::Base.uncached do
-            Node.where('hostname is not ?', nil).collect do |n|
-              if n.info[:slurm_state]
-                nodes_in_state[n.info[:slurm_state]] ||= 0
-                nodes_in_state[n.info[:slurm_state]] += 1
-              end
-            end
-          end
-          job_queue = Job.queue.select(:uuid)
-          n_queued_before_me = 0
-          job_queue.each do |j|
-            break if j.uuid == @job.uuid
-            n_queued_before_me += 1
-          end
-          yield "#{db_current_time}" \
-            " job #{@job.uuid}" \
-            " queue_position #{n_queued_before_me}" \
-            " queue_size #{job_queue.count}" \
-            " nodes_idle #{nodes_in_state[:idle]}" \
-            " nodes_alloc #{nodes_in_state[:alloc]}\n"
-          last_ack_at = db_current_time
-        end
-        sleep 3
-        ActiveRecord::Base.uncached do
-          @job.reload
-        end
-      end
-    end
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 
   def queue
-    params[:order] ||= ['priority desc', 'created_at']
-    load_limit_offset_order_params
-    load_where_param
-    @where.merge!({state: Job::Queued})
-    return if !load_filters_param
-    find_objects_for_index
+    @objects = []
     index
   end
 
   def queue_size
-    # Users may not be allowed to see all the jobs in the queue, so provide a
-    # method to get just the queue size in order to get a gist of how busy the
-    # cluster is.
-    render :json => {:queue_size => Job.queue.size}
+    render :json => {:queue_size => 0}
   end
 
   def self._create_requires_parameters
index baffda1c99b96ad72e81879c1fff9d124ef2635c..166f71049b249606f3667045b07d2bea1c17639e 100644 (file)
@@ -7,9 +7,13 @@ class Arvados::V1::PipelineInstancesController < ApplicationController
   accept_attribute_as_json :properties, Hash
   accept_attribute_as_json :components_summary, Hash
 
+  def create
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
+  end
+
   def cancel
-    reload_object_before_update
-    @object.cancel cascade: params[:cascade]
-    show
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 end
index a276948d59de444ab0c13e9cdc97eaeca39b26d9..4a5e724ee64471df8bcd38db5e6e2193c05e244b 100644 (file)
@@ -4,4 +4,9 @@
 
 class Arvados::V1::PipelineTemplatesController < ApplicationController
   accept_attribute_as_json :components, Hash
+
+  def create
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
+  end
 end
diff --git a/services/api/app/helpers/commit_ancestors_helper.rb b/services/api/app/helpers/commit_ancestors_helper.rb
deleted file mode 100644 (file)
index 6def2c9..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-module CommitAncestorsHelper
-end
index d44719f924aa31b27225d24eb5b5f534b5de4914..b1c02b2a76a1597f7caf0837cf609b2deffa1eac 100644 (file)
@@ -3,4 +3,267 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 module CommitsHelper
+
+  class GitError < RequestError
+    def http_status
+      422
+    end
+  end
+
+  def self.git_check_ref_format(e)
+    if !e or e.empty? or e[0] == '-' or e[0] == '$'
+      # definitely not valid
+      false
+    else
+      `git check-ref-format --allow-onelevel #{e.shellescape}`
+      $?.success?
+    end
+  end
+
+  # Return an array of commits (each a 40-char sha1) satisfying the
+  # given criteria.
+  #
+  # Return [] if the revisions given in minimum/maximum are invalid or
+  # don't exist in the given repository.
+  #
+  # Raise ArgumentError if the given repository is invalid, does not
+  # exist, or cannot be read for any reason. (Any transient error that
+  # prevents commit ranges from resolving must raise rather than
+  # returning an empty array.)
+  #
+  # repository can be the name of a locally hosted repository or a git
+  # URL (see git-fetch(1)). Currently http, https, and git schemes are
+  # supported.
+  def self.find_commit_range repository, minimum, maximum, exclude
+    if minimum and minimum.empty?
+      minimum = nil
+    end
+
+    if minimum and !git_check_ref_format(minimum)
+      Rails.logger.warn "find_commit_range called with invalid minimum revision: '#{minimum}'"
+      return []
+    end
+
+    if maximum and !git_check_ref_format(maximum)
+      Rails.logger.warn "find_commit_range called with invalid maximum revision: '#{maximum}'"
+      return []
+    end
+
+    if !maximum
+      maximum = "HEAD"
+    end
+
+    gitdir, is_remote = git_dir_for repository
+    fetch_remote_repository gitdir, repository if is_remote
+    ENV['GIT_DIR'] = gitdir
+
+    commits = []
+
+    # Get the commit hash for the upper bound
+    max_hash = nil
+    git_max_hash_cmd = "git rev-list --max-count=1 #{maximum.shellescape} --"
+    IO.foreach("|#{git_max_hash_cmd}") do |line|
+      max_hash = line.strip
+    end
+
+    # If not found, nothing else to do
+    if !max_hash
+      Rails.logger.warn "no refs found looking for max_hash: `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` returned no output"
+      return []
+    end
+
+    # If string is invalid, nothing else to do
+    if !git_check_ref_format(max_hash)
+      Rails.logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` was invalid for max_hash: #{max_hash}"
+      return []
+    end
+
+    resolved_exclude = nil
+    if exclude
+      resolved_exclude = []
+      exclude.each do |e|
+        if git_check_ref_format(e)
+          IO.foreach("|git rev-list --max-count=1 #{e.shellescape} --") do |line|
+            resolved_exclude.push(line.strip)
+          end
+        else
+          Rails.logger.warn "find_commit_range called with invalid exclude invalid characters: '#{exclude}'"
+          return []
+        end
+      end
+    end
+
+    if minimum
+      # Get the commit hash for the lower bound
+      min_hash = nil
+      git_min_hash_cmd = "git rev-list --max-count=1 #{minimum.shellescape} --"
+      IO.foreach("|#{git_min_hash_cmd}") do |line|
+        min_hash = line.strip
+      end
+
+      # If not found, nothing else to do
+      if !min_hash
+        Rails.logger.warn "no refs found looking for min_hash: `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` returned no output"
+        return []
+      end
+
+      # If string is invalid, nothing else to do
+      if !git_check_ref_format(min_hash)
+        Rails.logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` was invalid for min_hash: #{min_hash}"
+        return []
+      end
+
+      # Now find all commits between them
+      IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
+        hash = line.strip
+        commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
+      end
+
+      commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
+    else
+      commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
+    end
+
+    commits
+  end
+
+  # Given a repository (url, or name of hosted repo) and commit sha1,
+  # copy the commit into the internal git repo (if necessary), and tag
+  # it with the given tag (typically a job UUID).
+  #
+  # The repo can be a remote url, but in this case sha1 must already
+  # be present in our local cache for that repo: e.g., sha1 was just
+  # returned by find_commit_range.
+  def self.tag_in_internal_repository repo_name, sha1, tag
+    unless git_check_ref_format tag
+      raise ArgumentError.new "invalid tag #{tag}"
+    end
+    unless /^[0-9a-f]{40}$/ =~ sha1
+      raise ArgumentError.new "invalid sha1 #{sha1}"
+    end
+    src_gitdir, _ = git_dir_for repo_name
+    unless src_gitdir
+      raise ArgumentError.new "no local repository for #{repo_name}"
+    end
+    dst_gitdir = Rails.configuration.Containers.JobsAPI.GitInternalDir
+
+    begin
+      commit_in_dst = must_git(dst_gitdir, "log -n1 --format=%H #{sha1.shellescape}^{commit}").strip
+    rescue GitError
+      commit_in_dst = false
+    end
+
+    tag_cmd = "tag --force #{tag.shellescape} #{sha1.shellescape}^{commit}"
+    if commit_in_dst == sha1
+      must_git(dst_gitdir, tag_cmd)
+    else
+      # git-fetch is faster than pack-objects|unpack-objects, but
+      # git-fetch can't fetch by sha1. So we first try to fetch a
+      # branch that has the desired commit, and if that fails (there
+      # is no such branch, or the branch we choose changes under us in
+      # race), we fall back to pack|unpack.
+      begin
+        branches = must_git(src_gitdir,
+                            "branch --contains #{sha1.shellescape}")
+        m = branches.match(/^. (\w+)\n/)
+        if !m
+          raise GitError.new "commit is not on any branch"
+        end
+        branch = m[1]
+        must_git(dst_gitdir,
+                 "fetch file://#{src_gitdir.shellescape} #{branch.shellescape}")
+        # Even if all of the above steps succeeded, we might still not
+        # have the right commit due to a race, in which case tag_cmd
+        # will fail, and we'll need to fall back to pack|unpack. So
+        # don't be tempted to condense this tag_cmd and the one in the
+        # rescue block into a single attempt.
+        must_git(dst_gitdir, tag_cmd)
+      rescue GitError
+        must_pipe("echo #{sha1.shellescape}",
+                  "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout",
+                  "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q")
+        must_git(dst_gitdir, tag_cmd)
+      end
+    end
+  end
+
+  protected
+
+  def self.remote_url? repo_name
+    /^(https?|git):\/\// =~ repo_name
+  end
+
+  # Return [local_git_dir, is_remote]. If is_remote, caller must use
+  # fetch_remote_repository to ensure content is up-to-date.
+  #
+  # Raises an exception if the latest content could not be fetched for
+  # any reason.
+  def self.git_dir_for repo_name
+    if remote_url? repo_name
+      return [cache_dir_for(repo_name), true]
+    end
+    repos = Repository.readable_by(current_user).where(name: repo_name)
+    if repos.count == 0
+      raise ArgumentError.new "Repository not found: '#{repo_name}'"
+    elsif repos.count > 1
+      Rails.logger.error "Multiple repositories with name=='#{repo_name}'!"
+      raise ArgumentError.new "Name conflict"
+    else
+      return [repos.first.server_path, false]
+    end
+  end
+
+  def self.cache_dir_for git_url
+    File.join(cache_dir_base, Digest::SHA1.hexdigest(git_url) + ".git").to_s
+  end
+
+  def self.cache_dir_base
+    Rails.root.join 'tmp', 'git-cache'
+  end
+
+  def self.fetch_remote_repository gitdir, git_url
+    # Caller decides which protocols are worth using. This is just a
+    # safety check to ensure we never use urls like "--flag" or wander
+    # into git's hardlink features by using bare "/path/foo" instead
+    # of "file:///path/foo".
+    unless /^[a-z]+:\/\// =~ git_url
+      raise ArgumentError.new "invalid git url #{git_url}"
+    end
+    begin
+      must_git gitdir, "branch"
+    rescue GitError => e
+      raise unless /Not a git repository/i =~ e.to_s
+      # OK, this just means we need to create a blank cache repository
+      # before fetching.
+      FileUtils.mkdir_p gitdir
+      must_git gitdir, "init"
+    end
+    must_git(gitdir,
+             "fetch --no-progress --tags --prune --force --update-head-ok #{git_url.shellescape} 'refs/heads/*:refs/heads/*'")
+  end
+
+  def self.must_git gitdir, *cmds
+    # Clear token in case a git helper tries to use it as a password.
+    orig_token = ENV['ARVADOS_API_TOKEN']
+    ENV['ARVADOS_API_TOKEN'] = ''
+    last_output = ''
+    begin
+      git = "git --git-dir #{gitdir.shellescape}"
+      cmds.each do |cmd|
+        last_output = must_pipe git+" "+cmd
+      end
+    ensure
+      ENV['ARVADOS_API_TOKEN'] = orig_token
+    end
+    return last_output
+  end
+
+  def self.must_pipe *cmds
+    cmd = cmds.join(" 2>&1 |") + " 2>&1"
+    out = IO.read("| </dev/null #{cmd}")
+    if not $?.success?
+      raise GitError.new "#{cmd}: #{$?}: #{out}"
+    end
+    return out
+  end
 end
diff --git a/services/api/app/models/commit.rb b/services/api/app/models/commit.rb
deleted file mode 100644 (file)
index 2f7e9cd..0000000
+++ /dev/null
@@ -1,272 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'request_error'
-
-class Commit < ActiveRecord::Base
-  extend CurrentApiClient
-
-  class GitError < RequestError
-    def http_status
-      422
-    end
-  end
-
-  def self.git_check_ref_format(e)
-    if !e or e.empty? or e[0] == '-' or e[0] == '$'
-      # definitely not valid
-      false
-    else
-      `git check-ref-format --allow-onelevel #{e.shellescape}`
-      $?.success?
-    end
-  end
-
-  # Return an array of commits (each a 40-char sha1) satisfying the
-  # given criteria.
-  #
-  # Return [] if the revisions given in minimum/maximum are invalid or
-  # don't exist in the given repository.
-  #
-  # Raise ArgumentError if the given repository is invalid, does not
-  # exist, or cannot be read for any reason. (Any transient error that
-  # prevents commit ranges from resolving must raise rather than
-  # returning an empty array.)
-  #
-  # repository can be the name of a locally hosted repository or a git
-  # URL (see git-fetch(1)). Currently http, https, and git schemes are
-  # supported.
-  def self.find_commit_range repository, minimum, maximum, exclude
-    if minimum and minimum.empty?
-      minimum = nil
-    end
-
-    if minimum and !git_check_ref_format(minimum)
-      logger.warn "find_commit_range called with invalid minimum revision: '#{minimum}'"
-      return []
-    end
-
-    if maximum and !git_check_ref_format(maximum)
-      logger.warn "find_commit_range called with invalid maximum revision: '#{maximum}'"
-      return []
-    end
-
-    if !maximum
-      maximum = "HEAD"
-    end
-
-    gitdir, is_remote = git_dir_for repository
-    fetch_remote_repository gitdir, repository if is_remote
-    ENV['GIT_DIR'] = gitdir
-
-    commits = []
-
-    # Get the commit hash for the upper bound
-    max_hash = nil
-    git_max_hash_cmd = "git rev-list --max-count=1 #{maximum.shellescape} --"
-    IO.foreach("|#{git_max_hash_cmd}") do |line|
-      max_hash = line.strip
-    end
-
-    # If not found, nothing else to do
-    if !max_hash
-      logger.warn "no refs found looking for max_hash: `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` returned no output"
-      return []
-    end
-
-    # If string is invalid, nothing else to do
-    if !git_check_ref_format(max_hash)
-      logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` was invalid for max_hash: #{max_hash}"
-      return []
-    end
-
-    resolved_exclude = nil
-    if exclude
-      resolved_exclude = []
-      exclude.each do |e|
-        if git_check_ref_format(e)
-          IO.foreach("|git rev-list --max-count=1 #{e.shellescape} --") do |line|
-            resolved_exclude.push(line.strip)
-          end
-        else
-          logger.warn "find_commit_range called with invalid exclude invalid characters: '#{exclude}'"
-          return []
-        end
-      end
-    end
-
-    if minimum
-      # Get the commit hash for the lower bound
-      min_hash = nil
-      git_min_hash_cmd = "git rev-list --max-count=1 #{minimum.shellescape} --"
-      IO.foreach("|#{git_min_hash_cmd}") do |line|
-        min_hash = line.strip
-      end
-
-      # If not found, nothing else to do
-      if !min_hash
-        logger.warn "no refs found looking for min_hash: `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` returned no output"
-        return []
-      end
-
-      # If string is invalid, nothing else to do
-      if !git_check_ref_format(min_hash)
-        logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` was invalid for min_hash: #{min_hash}"
-        return []
-      end
-
-      # Now find all commits between them
-      IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
-        hash = line.strip
-        commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
-      end
-
-      commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
-    else
-      commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
-    end
-
-    commits
-  end
-
-  # Given a repository (url, or name of hosted repo) and commit sha1,
-  # copy the commit into the internal git repo (if necessary), and tag
-  # it with the given tag (typically a job UUID).
-  #
-  # The repo can be a remote url, but in this case sha1 must already
-  # be present in our local cache for that repo: e.g., sha1 was just
-  # returned by find_commit_range.
-  def self.tag_in_internal_repository repo_name, sha1, tag
-    unless git_check_ref_format tag
-      raise ArgumentError.new "invalid tag #{tag}"
-    end
-    unless /^[0-9a-f]{40}$/ =~ sha1
-      raise ArgumentError.new "invalid sha1 #{sha1}"
-    end
-    src_gitdir, _ = git_dir_for repo_name
-    unless src_gitdir
-      raise ArgumentError.new "no local repository for #{repo_name}"
-    end
-    dst_gitdir = Rails.configuration.Containers.JobsAPI.GitInternalDir
-
-    begin
-      commit_in_dst = must_git(dst_gitdir, "log -n1 --format=%H #{sha1.shellescape}^{commit}").strip
-    rescue GitError
-      commit_in_dst = false
-    end
-
-    tag_cmd = "tag --force #{tag.shellescape} #{sha1.shellescape}^{commit}"
-    if commit_in_dst == sha1
-      must_git(dst_gitdir, tag_cmd)
-    else
-      # git-fetch is faster than pack-objects|unpack-objects, but
-      # git-fetch can't fetch by sha1. So we first try to fetch a
-      # branch that has the desired commit, and if that fails (there
-      # is no such branch, or the branch we choose changes under us in
-      # race), we fall back to pack|unpack.
-      begin
-        branches = must_git(src_gitdir,
-                            "branch --contains #{sha1.shellescape}")
-        m = branches.match(/^. (\w+)\n/)
-        if !m
-          raise GitError.new "commit is not on any branch"
-        end
-        branch = m[1]
-        must_git(dst_gitdir,
-                 "fetch file://#{src_gitdir.shellescape} #{branch.shellescape}")
-        # Even if all of the above steps succeeded, we might still not
-        # have the right commit due to a race, in which case tag_cmd
-        # will fail, and we'll need to fall back to pack|unpack. So
-        # don't be tempted to condense this tag_cmd and the one in the
-        # rescue block into a single attempt.
-        must_git(dst_gitdir, tag_cmd)
-      rescue GitError
-        must_pipe("echo #{sha1.shellescape}",
-                  "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout",
-                  "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q")
-        must_git(dst_gitdir, tag_cmd)
-      end
-    end
-  end
-
-  protected
-
-  def self.remote_url? repo_name
-    /^(https?|git):\/\// =~ repo_name
-  end
-
-  # Return [local_git_dir, is_remote]. If is_remote, caller must use
-  # fetch_remote_repository to ensure content is up-to-date.
-  #
-  # Raises an exception if the latest content could not be fetched for
-  # any reason.
-  def self.git_dir_for repo_name
-    if remote_url? repo_name
-      return [cache_dir_for(repo_name), true]
-    end
-    repos = Repository.readable_by(current_user).where(name: repo_name)
-    if repos.count == 0
-      raise ArgumentError.new "Repository not found: '#{repo_name}'"
-    elsif repos.count > 1
-      logger.error "Multiple repositories with name=='#{repo_name}'!"
-      raise ArgumentError.new "Name conflict"
-    else
-      return [repos.first.server_path, false]
-    end
-  end
-
-  def self.cache_dir_for git_url
-    File.join(cache_dir_base, Digest::SHA1.hexdigest(git_url) + ".git").to_s
-  end
-
-  def self.cache_dir_base
-    Rails.root.join 'tmp', 'git-cache'
-  end
-
-  def self.fetch_remote_repository gitdir, git_url
-    # Caller decides which protocols are worth using. This is just a
-    # safety check to ensure we never use urls like "--flag" or wander
-    # into git's hardlink features by using bare "/path/foo" instead
-    # of "file:///path/foo".
-    unless /^[a-z]+:\/\// =~ git_url
-      raise ArgumentError.new "invalid git url #{git_url}"
-    end
-    begin
-      must_git gitdir, "branch"
-    rescue GitError => e
-      raise unless /Not a git repository/i =~ e.to_s
-      # OK, this just means we need to create a blank cache repository
-      # before fetching.
-      FileUtils.mkdir_p gitdir
-      must_git gitdir, "init"
-    end
-    must_git(gitdir,
-             "fetch --no-progress --tags --prune --force --update-head-ok #{git_url.shellescape} 'refs/heads/*:refs/heads/*'")
-  end
-
-  def self.must_git gitdir, *cmds
-    # Clear token in case a git helper tries to use it as a password.
-    orig_token = ENV['ARVADOS_API_TOKEN']
-    ENV['ARVADOS_API_TOKEN'] = ''
-    last_output = ''
-    begin
-      git = "git --git-dir #{gitdir.shellescape}"
-      cmds.each do |cmd|
-        last_output = must_pipe git+" "+cmd
-      end
-    ensure
-      ENV['ARVADOS_API_TOKEN'] = orig_token
-    end
-    return last_output
-  end
-
-  def self.must_pipe *cmds
-    cmd = cmds.join(" 2>&1 |") + " 2>&1"
-    out = IO.read("| </dev/null #{cmd}")
-    if not $?.success?
-      raise GitError.new "#{cmd}: #{$?}: #{out}"
-    end
-    return out
-  end
-end
diff --git a/services/api/app/models/commit_ancestor.rb b/services/api/app/models/commit_ancestor.rb
deleted file mode 100644 (file)
index 59e8552..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# Usage:
-#
-# x = CommitAncestor.find_or_create_by_descendant_and_ancestor(a, b)
-# "b is an ancestor of a" if x.is
-#
-
-class CommitAncestor < ActiveRecord::Base
-  before_create :ask_git_whether_is
-
-  class CommitNotFoundError < ArgumentError
-  end
-
-  protected
-
-  def ask_git_whether_is
-    @gitdirbase = Rails.configuration.Git.Repositories
-    self.is = nil
-    Dir.foreach @gitdirbase do |repo|
-      next if repo.match(/^\./)
-      git_dir = repo.match(/\.git$/) ? repo : File.join(repo, '.git')
-      repo_name = repo.sub(/\.git$/, '')
-      ENV['GIT_DIR'] = File.join(@gitdirbase, git_dir)
-      IO.foreach("|git rev-list --format=oneline '#{self.descendant.gsub(/[^0-9a-f]/,"")}'") do |line|
-        self.is = false
-        sha1, _ = line.strip.split(" ", 2)
-        if sha1 == self.ancestor
-          self.is = true
-          break
-        end
-      end
-      if !self.is.nil?
-        self.repository_name = repo_name
-        break
-      end
-    end
-    if self.is.nil?
-      raise CommitNotFoundError.new "Specified commit was not found"
-    end
-  end
-end
index 4d63deb99cd1d236b348996228c0d3b036416e6e..37e5f455dffe73b61c783afc219913a6daf8313f 100644 (file)
@@ -1,6 +1,26 @@
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
 # SPDX-License-Identifier: AGPL-3.0
+#
+#
+# Legacy jobs API aka crunch v1
+#
+# This is superceded by containers / container_requests (aka crunch v2)
+#
+# Arvados installations since the end of 2017 should have never
+# used jobs, and are unaffected by this change.
+#
+# So that older Arvados sites don't lose access to legacy records, the
+# API has been converted to read-only.  Creating and updating jobs
+# (and related types job_task, pipeline_template and
+# pipeline_instance) is disabled and much of the business logic
+# related has been removed, along with the crunch-dispatch.rb and
+# various other code specific to the jobs API.
+#
+# If you need to resurrect any of this code, here is the last commit
+# on master before the branch removing jobs API support:
+#
+# Wed Aug 7 14:49:38 2019 -0400 07d92519438a592d531f2c7558cd51788da262ca
 
 require 'log_reuse_info'
 require 'safe_json'
@@ -16,7 +36,6 @@ class Job < ArvadosModel
   serialize :runtime_constraints, Hash
   serialize :tasks_summary, Hash
   before_create :ensure_unique_submit_id
-  after_commit :trigger_crunch_dispatch_if_cancelled, :on => :update
   before_validation :set_priority
   before_validation :update_state_from_old_state_attrs
   before_validation :update_script_parameters_digest
@@ -28,8 +47,9 @@ class Job < ArvadosModel
   validate :ensure_no_collection_uuids_in_script_params
   before_save :tag_version_in_internal_repository
   before_save :update_timestamps_when_state_changes
+  before_create :create_disabled
+  before_update :update_disabled
 
-  has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
   has_many(:nodes, foreign_key: :job_uuid, primary_key: :uuid)
 
   class SubmitIdReused < RequestError
@@ -191,7 +211,7 @@ class Job < ArvadosModel
       else
         raise ArgumentError.new("unknown attribute for git filter: #{attr}")
       end
-      revisions = Commit.find_commit_range(filter["repository"],
+      revisions = CommitsHelper::find_commit_range(filter["repository"],
                                            filter["min_version"],
                                            filter["max_version"],
                                            filter["exclude_versions"])
@@ -207,144 +227,11 @@ class Job < ArvadosModel
     filters
   end
 
-  def self.find_reusable attrs, params, filters, read_users
-    if filters.empty?  # Translate older creation parameters into filters.
-      filters =
-        [["repository", "=", attrs[:repository]],
-         ["script", "=", attrs[:script]],
-         ["script_version", "not in git", params[:exclude_script_versions]],
-        ].reject { |filter| filter.last.nil? or filter.last.empty? }
-      if !params[:minimum_script_version].blank?
-        filters << ["script_version", "in git",
-                     params[:minimum_script_version]]
-      else
-        filters += default_git_filters("script_version", attrs[:repository],
-                                       attrs[:script_version])
-      end
-      if image_search = attrs[:runtime_constraints].andand["docker_image"]
-        if image_tag = attrs[:runtime_constraints]["docker_image_tag"]
-          image_search += ":#{image_tag}"
-        end
-        image_locator = Collection.
-          for_latest_docker_image(image_search).andand.portable_data_hash
-      else
-        image_locator = nil
-      end
-      filters << ["docker_image_locator", "=", image_locator]
-      if sdk_version = attrs[:runtime_constraints].andand["arvados_sdk_version"]
-        filters += default_git_filters("arvados_sdk_version", "arvados", sdk_version)
-      end
-      filters = load_job_specific_filters(attrs, filters, read_users)
-    end
-
-    # Check specified filters for some reasonableness.
-    filter_names = filters.map { |f| f.first }.uniq
-    ["repository", "script"].each do |req_filter|
-      if not filter_names.include?(req_filter)
-        return send_error("#{req_filter} filter required")
-      end
-    end
-
-    # Search for a reusable Job, and return it if found.
-    candidates = Job.readable_by(current_user)
-    log_reuse_info { "starting with #{candidates.count} jobs readable by current user #{current_user.uuid}" }
-
-    candidates = candidates.where(
-      'state = ? or (owner_uuid = ? and state in (?))',
-      Job::Complete, current_user.uuid, [Job::Queued, Job::Running])
-    log_reuse_info(candidates) { "after filtering on job state ((state=Complete) or (state=Queued/Running and (submitted by current user)))" }
-
-    digest = Job.sorted_hash_digest(attrs[:script_parameters])
-    candidates = candidates.where('script_parameters_digest = ?', digest)
-    log_reuse_info(candidates) { "after filtering on script_parameters_digest #{digest}" }
-
-    candidates = candidates.where('nondeterministic is distinct from ?', true)
-    log_reuse_info(candidates) { "after filtering on !nondeterministic" }
-
-    # prefer Running jobs over Queued
-    candidates = candidates.order('state desc, created_at')
-
-    candidates = apply_filters candidates, filters
-    log_reuse_info(candidates) { "after filtering on repo, script, and custom filters #{filters.inspect}" }
-
-    chosen = nil
-    chosen_output = nil
-    incomplete_job = nil
-    candidates.each do |j|
-      if j.state != Job::Complete
-        if !incomplete_job
-          # We'll use this if we don't find a job that has completed
-          log_reuse_info { "job #{j.uuid} is reusable, but unfinished; continuing search for completed jobs" }
-          incomplete_job = j
-        else
-          log_reuse_info { "job #{j.uuid} is unfinished and we already have #{incomplete_job.uuid}; ignoring" }
-        end
-      elsif chosen == false
-        # Ignore: we have already decided not to reuse any completed
-        # job.
-        log_reuse_info { "job #{j.uuid} with output #{j.output} ignored, see above" }
-      elsif j.output.nil?
-        log_reuse_info { "job #{j.uuid} has nil output" }
-      elsif j.log.nil?
-        log_reuse_info { "job #{j.uuid} has nil log" }
-      elsif Rails.configuration.Containers.JobsAPI.ReuseJobIfOutputsDiffer
-        if !Collection.readable_by(current_user).find_by_portable_data_hash(j.output)
-          # Ignore: keep looking for an incomplete job or one whose
-          # output is readable.
-          log_reuse_info { "job #{j.uuid} output #{j.output} unavailable to user; continuing search" }
-        elsif !Collection.readable_by(current_user).find_by_portable_data_hash(j.log)
-          # Ignore: keep looking for an incomplete job or one whose
-          # log is readable.
-          log_reuse_info { "job #{j.uuid} log #{j.log} unavailable to user; continuing search" }
-        else
-          log_reuse_info { "job #{j.uuid} with output #{j.output} is reusable; decision is final." }
-          return j
-        end
-      elsif chosen_output
-        if chosen_output != j.output
-          # If two matching jobs produced different outputs, run a new
-          # job (or use one that's already running/queued) instead of
-          # choosing one arbitrarily.
-          log_reuse_info { "job #{j.uuid} output #{j.output} disagrees; forgetting about #{chosen.uuid} and ignoring any other finished jobs (see reuse_job_if_outputs_differ in application.default.yml)" }
-          chosen = false
-        else
-          log_reuse_info { "job #{j.uuid} output #{j.output} agrees with chosen #{chosen.uuid}; continuing search in case other candidates have different outputs" }
-        end
-        # ...and that's the only thing we need to do once we've chosen
-        # a job to reuse.
-      elsif !Collection.readable_by(current_user).find_by_portable_data_hash(j.output)
-        # This user cannot read the output of this job. Any other
-        # completed job will have either the same output (making it
-        # unusable) or a different output (making it unusable because
-        # reuse_job_if_outputs_different is turned off). Therefore,
-        # any further investigation of reusable jobs is futile.
-        log_reuse_info { "job #{j.uuid} output #{j.output} is unavailable to user; this means no finished job can be reused (see reuse_job_if_outputs_differ in application.default.yml)" }
-        chosen = false
-      elsif !Collection.readable_by(current_user).find_by_portable_data_hash(j.log)
-        # This user cannot read the log of this job, don't try to reuse the
-        # job but consider if the output is consistent.
-        log_reuse_info { "job #{j.uuid} log #{j.log} is unavailable to user; continuing search" }
-        chosen_output = j.output
-      else
-        log_reuse_info { "job #{j.uuid} with output #{j.output} can be reused; continuing search in case other candidates have different outputs" }
-        chosen = j
-        chosen_output = j.output
-      end
-    end
-    j = chosen || incomplete_job
-    if j
-      log_reuse_info { "done, #{j.uuid} was selected" }
-    else
-      log_reuse_info { "done, nothing suitable" }
-    end
-    return j
-  end
-
   def self.default_git_filters(attr_name, repo_name, refspec)
     # Add a filter to @filters for `attr_name` = the latest commit available
     # in `repo_name` at `refspec`.  No filter is added if refspec can't be
     # resolved.
-    commits = Commit.find_commit_range(repo_name, nil, refspec, nil)
+    commits = CommitsHelper::find_commit_range(repo_name, nil, refspec, nil)
     if commit_hash = commits.first
       [[attr_name, "=", commit_hash]]
     else
@@ -353,36 +240,7 @@ class Job < ArvadosModel
   end
 
   def cancel(cascade: false, need_transaction: true)
-    if need_transaction
-      ActiveRecord::Base.transaction do
-        cancel(cascade: cascade, need_transaction: false)
-      end
-      return
-    end
-
-    if self.state.in?([Queued, Running])
-      self.state = Cancelled
-      self.save!
-    elsif self.state != Cancelled
-      raise InvalidStateTransitionError
-    end
-
-    return if !cascade
-
-    # cancel all children; they could be jobs or pipeline instances
-    children = self.components.andand.collect{|_, u| u}.compact
-
-    return if children.empty?
-
-    # cancel any child jobs
-    Job.where(uuid: children, state: [Queued, Running]).each do |job|
-      job.cancel(cascade: cascade, need_transaction: false)
-    end
-
-    # cancel any child pipelines
-    PipelineInstance.where(uuid: children, state: [PipelineInstance::RunningOnServer, PipelineInstance::RunningOnClient]).each do |pi|
-      pi.cancel(cascade: cascade, need_transaction: false)
-    end
+    raise "No longer supported"
   end
 
   protected
@@ -418,7 +276,7 @@ class Job < ArvadosModel
       return true
     end
     if new_record? or repository_changed? or script_version_changed?
-      sha1 = Commit.find_commit_range(repository,
+      sha1 = CommitsHelper::find_commit_range(repository,
                                       nil, script_version, nil).first
       if not sha1
         errors.add :script_version, "#{script_version} does not resolve to a commit"
@@ -443,7 +301,7 @@ class Job < ArvadosModel
       uuid_was = uuid
       begin
         assign_uuid
-        Commit.tag_in_internal_repository repository, script_version, uuid
+        CommitsHelper::tag_in_internal_repository repository, script_version, uuid
       rescue
         self.uuid = uuid_was
         raise
@@ -478,7 +336,7 @@ class Job < ArvadosModel
   def find_arvados_sdk_version
     resolve_runtime_constraint("arvados_sdk_version",
                                :arvados_sdk_version) do |git_search|
-      commits = Commit.find_commit_range("arvados",
+      commits = CommitsHelper::find_commit_range("arvados",
                                          nil, git_search, nil)
       if commits.empty?
         [false, "#{git_search} does not resolve to a commit"]
@@ -567,14 +425,6 @@ class Job < ArvadosModel
     super
   end
 
-  def trigger_crunch_dispatch_if_cancelled
-    if @need_crunch_dispatch_trigger
-      File.open(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger, 'wb') do
-        # That's all, just create/touch a file for crunch-job to see.
-      end
-    end
-  end
-
   def update_timestamps_when_state_changes
     return if not (state_changed? or new_record?)
 
@@ -703,4 +553,12 @@ class Job < ArvadosModel
     end
     false
   end
+
+  def create_disabled
+    raise "Disabled"
+  end
+
+  def update_disabled
+    raise "Disabled"
+  end
 end
index a960186502c2b410fe7abbe6c477a35690150bae..b181e76ccf33ca1e0923e2d8f566f64b2debad03 100644 (file)
@@ -9,6 +9,8 @@ class JobTask < ArvadosModel
   serialize :parameters, Hash
   before_create :set_default_qsequence
   after_update :delete_created_job_tasks_if_failed
+  before_create :create_disabled
+  before_update :update_disabled
 
   api_accessible :user, extend: :common do |t|
     t.add :job_uuid
@@ -35,4 +37,12 @@ class JobTask < ArvadosModel
     self.qsequence ||= self.class.connection.
       select_value("SELECT nextval('job_tasks_qsequence_seq')")
   end
+
+  def create_disabled
+    raise "Disabled"
+  end
+
+  def update_disabled
+    raise "Disabled"
+  end
 end
index c0781ef22856a6e181b21b0cb383cd7c8cfa366d..271b155aafa140b9ed7bef5d5d7dc5c8de43c549 100644 (file)
@@ -17,6 +17,8 @@ class PipelineInstance < ArvadosModel
   before_validation :update_timestamps_when_state_changes
   before_create :set_state_before_save
   before_save :set_state_before_save
+  before_create :create_disabled
+  before_update :update_disabled
 
   api_accessible :user, extend: :common do |t|
     t.add :pipeline_template_uuid
@@ -109,30 +111,7 @@ class PipelineInstance < ArvadosModel
   end
 
   def cancel(cascade: false, need_transaction: true)
-    if need_transaction
-      ActiveRecord::Base.transaction do
-        cancel(cascade: cascade, need_transaction: false)
-      end
-      return
-    end
-
-    if self.state.in?([RunningOnServer, RunningOnClient])
-      self.state = Paused
-      self.save!
-    elsif self.state != Paused
-      raise InvalidStateTransitionError
-    end
-
-    return if !cascade
-
-    # cancel all child jobs
-    children = self.components.andand.collect{|_, c| c['job']}.compact.collect{|j| j['uuid']}.compact
-
-    return if children.empty?
-
-    Job.where(uuid: children, state: [Job::Queued, Job::Running]).each do |job|
-      job.cancel(cascade: cascade, need_transaction: false)
-    end
+    raise "No longer supported"
   end
 
   protected
@@ -183,4 +162,12 @@ class PipelineInstance < ArvadosModel
     end
   end
 
+
+  def create_disabled
+    raise "Disabled"
+  end
+
+  def update_disabled
+    raise "Disabled"
+  end
 end
index b19a2b0f7c6c70f9b84a55d2eabeb5a73cd8ff34..7c694698e08edbccf5b390eccd453f1f2540c0ef 100644 (file)
@@ -3,6 +3,9 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 class PipelineTemplate < ArvadosModel
+  before_create :create_disabled
+  before_update :update_disabled
+
   include HasUuid
   include KindAndEtag
   include CommonApiTemplate
@@ -17,4 +20,12 @@ class PipelineTemplate < ArvadosModel
   def self.limit_index_columns_read
     ["components"]
   end
+
+  def create_disabled
+    raise "Disabled"
+  end
+
+  def update_disabled
+    raise "Disabled"
+  end
 end
index 847bee0483a37ac8fc755f64f1f74fea86d25d3d..09e54b9d4f4037656d76f5749e8ce959aed2848d 100644 (file)
@@ -146,12 +146,7 @@ arvcfg.declare_config "Containers.SLURM.Managed.ComputeNodeDomain", String, :com
 arvcfg.declare_config "Containers.SLURM.Managed.ComputeNodeNameservers", Hash, :compute_node_nameservers, ->(cfg, k, v) { arrayToHash cfg, "Containers.SLURM.Managed.ComputeNodeNameservers", v }
 arvcfg.declare_config "Containers.SLURM.Managed.AssignNodeHostname", String, :assign_node_hostname
 arvcfg.declare_config "Containers.JobsAPI.Enable", String, :enable_legacy_jobs_api, ->(cfg, k, v) { ConfigLoader.set_cfg cfg, "Containers.JobsAPI.Enable", v.to_s }
-arvcfg.declare_config "Containers.JobsAPI.CrunchJobWrapper", String, :crunch_job_wrapper
-arvcfg.declare_config "Containers.JobsAPI.CrunchJobUser", String, :crunch_job_user
-arvcfg.declare_config "Containers.JobsAPI.CrunchRefreshTrigger", String, :crunch_refresh_trigger
 arvcfg.declare_config "Containers.JobsAPI.GitInternalDir", String, :git_internal_dir
-arvcfg.declare_config "Containers.JobsAPI.ReuseJobIfOutputsDiffer", Boolean, :reuse_job_if_outputs_differ
-arvcfg.declare_config "Containers.JobsAPI.DefaultDockerImage", String, :default_docker_image_for_jobs
 arvcfg.declare_config "Mail.MailchimpAPIKey", String, :mailchimp_api_key
 arvcfg.declare_config "Mail.MailchimpListID", String, :mailchimp_list_id
 arvcfg.declare_config "Services.Controller.ExternalURL", URI
diff --git a/services/api/db/migrate/20190808145904_drop_commit_ancestors.rb b/services/api/db/migrate/20190808145904_drop_commit_ancestors.rb
new file mode 100644 (file)
index 0000000..974f3fe
--- /dev/null
@@ -0,0 +1,9 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class DropCommitAncestors < ActiveRecord::Migration[5.0]
+  def change
+    drop_table :commit_ancestors
+  end
+end
diff --git a/services/api/db/migrate/20190809135453_remove_commits_table.rb b/services/api/db/migrate/20190809135453_remove_commits_table.rb
new file mode 100644 (file)
index 0000000..a3f450e
--- /dev/null
@@ -0,0 +1,9 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class RemoveCommitsTable < ActiveRecord::Migration[5.0]
+  def change
+        drop_table :commits
+  end
+end
index 9bb059c2a93b619707fe439f5c4a60cf47826629..889ffa7486f96fda469545178ad51c4df702a117 100644 (file)
@@ -227,73 +227,6 @@ CREATE SEQUENCE public.collections_id_seq
 ALTER SEQUENCE public.collections_id_seq OWNED BY public.collections.id;
 
 
---
--- Name: commit_ancestors; Type: TABLE; Schema: public; Owner: -
---
-
-CREATE TABLE public.commit_ancestors (
-    id integer NOT NULL,
-    repository_name character varying(255),
-    descendant character varying(255) NOT NULL,
-    ancestor character varying(255) NOT NULL,
-    "is" boolean DEFAULT false NOT NULL,
-    created_at timestamp without time zone NOT NULL,
-    updated_at timestamp without time zone NOT NULL
-);
-
-
---
--- Name: commit_ancestors_id_seq; Type: SEQUENCE; Schema: public; Owner: -
---
-
-CREATE SEQUENCE public.commit_ancestors_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
---
--- Name: commit_ancestors_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
---
-
-ALTER SEQUENCE public.commit_ancestors_id_seq OWNED BY public.commit_ancestors.id;
-
-
---
--- Name: commits; Type: TABLE; Schema: public; Owner: -
---
-
-CREATE TABLE public.commits (
-    id integer NOT NULL,
-    repository_name character varying(255),
-    sha1 character varying(255),
-    message character varying(255),
-    created_at timestamp without time zone NOT NULL,
-    updated_at timestamp without time zone NOT NULL
-);
-
-
---
--- Name: commits_id_seq; Type: SEQUENCE; Schema: public; Owner: -
---
-
-CREATE SEQUENCE public.commits_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
---
--- Name: commits_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
---
-
-ALTER SEQUENCE public.commits_id_seq OWNED BY public.commits.id;
-
-
 --
 -- Name: container_requests; Type: TABLE; Schema: public; Owner: -
 --
@@ -1267,20 +1200,6 @@ ALTER TABLE ONLY public.authorized_keys ALTER COLUMN id SET DEFAULT nextval('pub
 ALTER TABLE ONLY public.collections ALTER COLUMN id SET DEFAULT nextval('public.collections_id_seq'::regclass);
 
 
---
--- Name: commit_ancestors id; Type: DEFAULT; Schema: public; Owner: -
---
-
-ALTER TABLE ONLY public.commit_ancestors ALTER COLUMN id SET DEFAULT nextval('public.commit_ancestors_id_seq'::regclass);
-
-
---
--- Name: commits id; Type: DEFAULT; Schema: public; Owner: -
---
-
-ALTER TABLE ONLY public.commits ALTER COLUMN id SET DEFAULT nextval('public.commits_id_seq'::regclass);
-
-
 --
 -- Name: container_requests id; Type: DEFAULT; Schema: public; Owner: -
 --
@@ -1461,22 +1380,6 @@ ALTER TABLE ONLY public.collections
     ADD CONSTRAINT collections_pkey PRIMARY KEY (id);
 
 
---
--- Name: commit_ancestors commit_ancestors_pkey; Type: CONSTRAINT; Schema: public; Owner: -
---
-
-ALTER TABLE ONLY public.commit_ancestors
-    ADD CONSTRAINT commit_ancestors_pkey PRIMARY KEY (id);
-
-
---
--- Name: commits commits_pkey; Type: CONSTRAINT; Schema: public; Owner: -
---
-
-ALTER TABLE ONLY public.commits
-    ADD CONSTRAINT commits_pkey PRIMARY KEY (id);
-
-
 --
 -- Name: container_requests container_requests_pkey; Type: CONSTRAINT; Schema: public; Owner: -
 --
@@ -1917,20 +1820,6 @@ CREATE INDEX index_collections_on_trash_at ON public.collections USING btree (tr
 CREATE UNIQUE INDEX index_collections_on_uuid ON public.collections USING btree (uuid);
 
 
---
--- Name: index_commit_ancestors_on_descendant_and_ancestor; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE UNIQUE INDEX index_commit_ancestors_on_descendant_and_ancestor ON public.commit_ancestors USING btree (descendant, ancestor);
-
-
---
--- Name: index_commits_on_repository_name_and_sha1; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE UNIQUE INDEX index_commits_on_repository_name_and_sha1 ON public.commits USING btree (repository_name, sha1);
-
-
 --
 -- Name: index_container_requests_on_container_uuid; Type: INDEX; Schema: public; Owner: -
 --
@@ -3125,6 +3014,8 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20190214214814'),
 ('20190322174136'),
 ('20190422144631'),
-('20190523180148');
+('20190523180148'),
+('20190808145904'),
+('20190809135453');
 
 
index 47e1e4bb178f2d0bd3f6d99dec1c23458bcd2ff0..1d5891ed62832e4fc761e7005122f5d2b0785fcd 100644 (file)
@@ -6,9 +6,9 @@ fpm_depends+=('git >= 1.7.10')
 
 case "$TARGET" in
     centos*)
-        fpm_depends+=(libcurl-devel postgresql-devel arvados-server)
+        fpm_depends+=(libcurl-devel postgresql-devel)
         ;;
     debian* | ubuntu*)
-        fpm_depends+=(libcurl-ssl-dev libpq-dev g++ arvados-server)
+        fpm_depends+=(libcurl-ssl-dev libpq-dev g++)
         ;;
 esac
index a970383b8714653e9621def26a404cc7e10bf48b..6f30f5ae33801afabd6c6a5e3fa6805c05e628fa 100644 (file)
@@ -17,6 +17,8 @@ module CanBeAnOwner
       next if t == 'schema_migrations'
       next if t == 'permission_refresh_lock'
       next if t == 'ar_internal_metadata'
+      next if t == 'commit_ancestors'
+      next if t == 'commits'
       klass = t.classify.constantize
       next unless klass and 'owner_uuid'.in?(klass.columns.collect(&:name))
       base.has_many(t.to_sym,
diff --git a/services/api/lib/crunch_dispatch.rb b/services/api/lib/crunch_dispatch.rb
deleted file mode 100644 (file)
index 4e64018..0000000
+++ /dev/null
@@ -1,981 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'open3'
-require 'shellwords'
-
-class CrunchDispatch
-  extend DbCurrentTime
-  include ApplicationHelper
-  include Process
-
-  EXIT_TEMPFAIL = 75
-  EXIT_RETRY_UNLOCKED = 93
-  RETRY_UNLOCKED_LIMIT = 3
-
-  class LogTime < Time
-    def to_s
-      self.utc.strftime "%Y-%m-%d_%H:%M:%S"
-    end
-  end
-
-  def initialize
-    @crunch_job_bin = (ENV['CRUNCH_JOB_BIN'] || `which arv-crunch-job`.strip)
-    if @crunch_job_bin.empty?
-      raise "No CRUNCH_JOB_BIN env var, and crunch-job not in path."
-    end
-
-    @docker_bin = ENV['CRUNCH_JOB_DOCKER_BIN']
-    @docker_run_args = ENV['CRUNCH_JOB_DOCKER_RUN_ARGS']
-    @cgroup_root = ENV['CRUNCH_CGROUP_ROOT']
-    @srun_sync_timeout = ENV['CRUNCH_SRUN_SYNC_TIMEOUT']
-
-    @arvados_internal = Rails.configuration.Containers.JobsAPI.GitInternalDir
-    if not File.exist? @arvados_internal
-      $stderr.puts `mkdir -p #{@arvados_internal.shellescape} && git init --bare #{@arvados_internal.shellescape}`
-      raise "No internal git repository available" unless ($? == 0)
-    end
-
-    @repo_root = Rails.configuration.Git.Repositories
-    @arvados_repo_path = Repository.where(name: "arvados").first.server_path
-    @authorizations = {}
-    @did_recently = {}
-    @fetched_commits = {}
-    @git_tags = {}
-    @node_state = {}
-    @pipe_auth_tokens = {}
-    @running = {}
-    @todo = []
-    @todo_job_retries = {}
-    @job_retry_counts = Hash.new(0)
-    @todo_pipelines = []
-  end
-
-  def sysuser
-    return act_as_system_user
-  end
-
-  def refresh_todo
-    if @runoptions[:jobs]
-      @todo = @todo_job_retries.values + Job.queue.select(&:repository)
-    end
-    if @runoptions[:pipelines]
-      @todo_pipelines = PipelineInstance.queue
-    end
-  end
-
-  def each_slurm_line(cmd, outfmt, max_fields=nil)
-    max_fields ||= outfmt.split(":").size
-    max_fields += 1  # To accommodate the node field we add
-    @@slurm_version ||= Gem::Version.new(`sinfo --version`.match(/\b[\d\.]+\b/)[0])
-    if Gem::Version.new('2.3') <= @@slurm_version
-      `#{cmd} --noheader -o '%n:#{outfmt}'`.each_line do |line|
-        yield line.chomp.split(":", max_fields)
-      end
-    else
-      # Expand rows with hostname ranges (like "foo[1-3,5,9-12]:idle")
-      # into multiple rows with one hostname each.
-      `#{cmd} --noheader -o '%N:#{outfmt}'`.each_line do |line|
-        tokens = line.chomp.split(":", max_fields)
-        if (re = tokens[0].match(/^(.*?)\[([-,\d]+)\]$/))
-          tokens.shift
-          re[2].split(",").each do |range|
-            range = range.split("-").collect(&:to_i)
-            (range[0]..range[-1]).each do |n|
-              yield [re[1] + n.to_s] + tokens
-            end
-          end
-        else
-          yield tokens
-        end
-      end
-    end
-  end
-
-  def slurm_status
-    slurm_nodes = {}
-    each_slurm_line("sinfo", "%t") do |hostname, state|
-      # Treat nodes in idle* state as down, because the * means that slurm
-      # hasn't been able to communicate with it recently.
-      state.sub!(/^idle\*/, "down")
-      state.sub!(/\W+$/, "")
-      state = "down" unless %w(idle alloc comp mix drng down).include?(state)
-      slurm_nodes[hostname] = {state: state, job: nil}
-    end
-    each_slurm_line("squeue", "%j") do |hostname, job_uuid|
-      slurm_nodes[hostname][:job] = job_uuid if slurm_nodes[hostname]
-    end
-    slurm_nodes
-  end
-
-  def update_node_status
-    return unless Rails.configuration.Containers.JobsAPI.CrunchJobWrapper.to_s.match(/^slurm/)
-    slurm_status.each_pair do |hostname, slurmdata|
-      next if @node_state[hostname] == slurmdata
-      begin
-        node = Node.where('hostname=?', hostname).order(:last_ping_at).last
-        if node
-          $stderr.puts "dispatch: update #{hostname} state to #{slurmdata}"
-          node.info["slurm_state"] = slurmdata[:state]
-          node.job_uuid = slurmdata[:job]
-          if node.save
-            @node_state[hostname] = slurmdata
-          else
-            $stderr.puts "dispatch: failed to update #{node.uuid}: #{node.errors.messages}"
-          end
-        elsif slurmdata[:state] != 'down'
-          $stderr.puts "dispatch: SLURM reports '#{hostname}' is not down, but no node has that name"
-        end
-      rescue => error
-        $stderr.puts "dispatch: error updating #{hostname} node status: #{error}"
-      end
-    end
-  end
-
-  def positive_int(raw_value, default=nil)
-    value = begin raw_value.to_i rescue 0 end
-    if value > 0
-      value
-    else
-      default
-    end
-  end
-
-  NODE_CONSTRAINT_MAP = {
-    # Map Job runtime_constraints keys to the corresponding Node info key.
-    'min_ram_mb_per_node' => 'total_ram_mb',
-    'min_scratch_mb_per_node' => 'total_scratch_mb',
-    'min_cores_per_node' => 'total_cpu_cores',
-  }
-
-  def nodes_available_for_job_now(job)
-    # Find Nodes that satisfy a Job's runtime constraints (by building
-    # a list of Procs and using them to test each Node).  If there
-    # enough to run the Job, return an array of their names.
-    # Otherwise, return nil.
-    need_procs = NODE_CONSTRAINT_MAP.each_pair.map do |job_key, node_key|
-      Proc.new do |node|
-        positive_int(node.properties[node_key], 0) >=
-          positive_int(job.runtime_constraints[job_key], 0)
-      end
-    end
-    min_node_count = positive_int(job.runtime_constraints['min_nodes'], 1)
-    usable_nodes = []
-    Node.all.select do |node|
-      node.info['slurm_state'] == 'idle'
-    end.sort_by do |node|
-      # Prefer nodes with no price, then cheap nodes, then expensive nodes
-      node.properties['cloud_node']['price'].to_f rescue 0
-    end.each do |node|
-      if need_procs.select { |need_proc| not need_proc.call(node) }.any?
-        # At least one runtime constraint is not satisfied by this node
-        next
-      end
-      usable_nodes << node
-      if usable_nodes.count >= min_node_count
-        hostnames = usable_nodes.map(&:hostname)
-        log_nodes = usable_nodes.map do |n|
-          "#{n.hostname} #{n.uuid} #{n.properties.to_json}"
-        end
-        log_job = "#{job.uuid} #{job.runtime_constraints}"
-        log_text = "dispatching job #{log_job} to #{log_nodes.join(", ")}"
-        $stderr.puts log_text
-        begin
-          act_as_system_user do
-            Log.new(object_uuid: job.uuid,
-                    event_type: 'dispatch',
-                    owner_uuid: system_user_uuid,
-                    summary: "dispatching to #{hostnames.join(", ")}",
-                    properties: {'text' => log_text}).save!
-          end
-        rescue => e
-          $stderr.puts "dispatch: log.create failed: #{e}"
-        end
-        return hostnames
-      end
-    end
-    nil
-  end
-
-  def nodes_available_for_job(job)
-    # Check if there are enough idle nodes with the Job's minimum
-    # hardware requirements to run it.  If so, return an array of
-    # their names.  If not, up to once per hour, signal start_jobs to
-    # hold off launching Jobs.  This delay is meant to give the Node
-    # Manager an opportunity to make new resources available for new
-    # Jobs.
-    #
-    # The exact timing parameters here might need to be adjusted for
-    # the best balance between helping the longest-waiting Jobs run,
-    # and making efficient use of immediately available resources.
-    # These are all just first efforts until we have more data to work
-    # with.
-    nodelist = nodes_available_for_job_now(job)
-    if nodelist.nil? and not did_recently(:wait_for_available_nodes, 3600)
-      $stderr.puts "dispatch: waiting for nodes for #{job.uuid}"
-      @node_wait_deadline = Time.now + 5.minutes
-    end
-    nodelist
-  end
-
-  def fail_job job, message, skip_lock: false
-    $stderr.puts "dispatch: #{job.uuid}: #{message}"
-    begin
-      Log.new(object_uuid: job.uuid,
-              event_type: 'dispatch',
-              owner_uuid: job.owner_uuid,
-              summary: message,
-              properties: {"text" => message}).save!
-    rescue => e
-      $stderr.puts "dispatch: log.create failed: #{e}"
-    end
-
-    if not skip_lock and not have_job_lock?(job)
-      begin
-        job.lock @authorizations[job.uuid].user.uuid
-      rescue ArvadosModel::AlreadyLockedError
-        $stderr.puts "dispatch: tried to mark job #{job.uuid} as failed but it was already locked by someone else"
-        return
-      end
-    end
-
-    job.state = "Failed"
-    if not job.save
-      $stderr.puts "dispatch: save failed setting job #{job.uuid} to failed"
-    end
-  end
-
-  def stdout_s(cmd_a, opts={})
-    IO.popen(cmd_a, "r", opts) do |pipe|
-      return pipe.read.chomp
-    end
-  end
-
-  def git_cmd(*cmd_a)
-    ["git", "--git-dir=#{@arvados_internal}"] + cmd_a
-  end
-
-  def get_authorization(job)
-    if @authorizations[job.uuid] and
-        @authorizations[job.uuid].user.uuid != job.modified_by_user_uuid
-      # We already made a token for this job, but we need a new one
-      # because modified_by_user_uuid has changed (the job will run
-      # as a different user).
-      @authorizations[job.uuid].update_attributes expires_at: Time.now
-      @authorizations[job.uuid] = nil
-    end
-    if not @authorizations[job.uuid]
-      auth = ApiClientAuthorization.
-        new(user: User.where('uuid=?', job.modified_by_user_uuid).first,
-            api_client_id: 0)
-      if not auth.save
-        $stderr.puts "dispatch: auth.save failed for #{job.uuid}"
-      else
-        @authorizations[job.uuid] = auth
-      end
-    end
-    @authorizations[job.uuid]
-  end
-
-  def internal_repo_has_commit? sha1
-    if (not @fetched_commits[sha1] and
-        sha1 == stdout_s(git_cmd("rev-list", "-n1", sha1), err: "/dev/null") and
-        $? == 0)
-      @fetched_commits[sha1] = true
-    end
-    return @fetched_commits[sha1]
-  end
-
-  def get_commit src_repo, sha1
-    return true if internal_repo_has_commit? sha1
-
-    # commit does not exist in internal repository, so import the
-    # source repository using git fetch-pack
-    cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
-    $stderr.puts "dispatch: #{cmd}"
-    $stderr.puts(stdout_s(cmd))
-    @fetched_commits[sha1] = ($? == 0)
-  end
-
-  def tag_commit(job, commit_hash, tag_name)
-    # @git_tags[T]==V if we know commit V has been tagged T in the
-    # arvados_internal repository.
-    if not @git_tags[tag_name]
-      cmd = git_cmd("tag", tag_name, commit_hash)
-      $stderr.puts "dispatch: #{cmd}"
-      $stderr.puts(stdout_s(cmd, err: "/dev/null"))
-      unless $? == 0
-        # git tag failed.  This may be because the tag already exists, so check for that.
-        tag_rev = stdout_s(git_cmd("rev-list", "-n1", tag_name))
-        if $? == 0
-          # We got a revision back
-          if tag_rev != commit_hash
-            # Uh oh, the tag doesn't point to the revision we were expecting.
-            # Someone has been monkeying with the job record and/or git.
-            fail_job job, "Existing tag #{tag_name} points to commit #{tag_rev} but expected commit #{commit_hash}"
-            return nil
-          end
-          # we're okay (fall through to setting @git_tags below)
-        else
-          # git rev-list failed for some reason.
-          fail_job job, "'git tag' for #{tag_name} failed but did not find any existing tag using 'git rev-list'"
-          return nil
-        end
-      end
-      # 'git tag' was successful, or there is an existing tag that points to the same revision.
-      @git_tags[tag_name] = commit_hash
-    elsif @git_tags[tag_name] != commit_hash
-      fail_job job, "Existing tag #{tag_name} points to commit #{@git_tags[tag_name]} but this job uses commit #{commit_hash}"
-      return nil
-    end
-    @git_tags[tag_name]
-  end
-
-  def start_jobs
-    @todo.each do |job|
-      next if @running[job.uuid]
-
-      cmd_args = nil
-      case Rails.configuration.Containers.JobsAPI.CrunchJobWrapper
-      when "none"
-        if @running.size > 0
-            # Don't run more than one at a time.
-            return
-        end
-        cmd_args = []
-      when "slurm_immediate"
-        nodelist = nodes_available_for_job(job)
-        if nodelist.nil?
-          if Time.now < @node_wait_deadline
-            break
-          else
-            next
-          end
-        end
-        cmd_args = ["salloc",
-                    "--chdir=/",
-                    "--immediate",
-                    "--exclusive",
-                    "--no-kill",
-                    "--job-name=#{job.uuid}",
-                    "--nodelist=#{nodelist.join(',')}"]
-      else
-        raise "Unknown crunch_job_wrapper: #{Rails.configuration.Containers.JobsAPI.CrunchJobWrapper}"
-      end
-
-      cmd_args = sudo_preface + cmd_args
-
-      next unless get_authorization job
-
-      ready = internal_repo_has_commit? job.script_version
-
-      if not ready
-        # Import the commit from the specified repository into the
-        # internal repository. This should have been done already when
-        # the job was created/updated; this code is obsolete except to
-        # avoid deployment races. Failing the job would be a
-        # reasonable thing to do at this point.
-        repo = Repository.where(name: job.repository).first
-        if repo.nil? or repo.server_path.nil?
-          fail_job job, "Repository #{job.repository} not found under #{@repo_root}"
-          next
-        end
-        ready &&= get_commit repo.server_path, job.script_version
-        ready &&= tag_commit job, job.script_version, job.uuid
-      end
-
-      # This should be unnecessary, because API server does it during
-      # job create/update, but it's still not a bad idea to verify the
-      # tag is correct before starting the job:
-      ready &&= tag_commit job, job.script_version, job.uuid
-
-      # The arvados_sdk_version doesn't support use of arbitrary
-      # remote URLs, so the requested version isn't necessarily copied
-      # into the internal repository yet.
-      if job.arvados_sdk_version
-        ready &&= get_commit @arvados_repo_path, job.arvados_sdk_version
-        ready &&= tag_commit job, job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"
-      end
-
-      if not ready
-        fail_job job, "commit not present in internal repository"
-        next
-      end
-
-      cmd_args += [@crunch_job_bin,
-                   '--job-api-token', @authorizations[job.uuid].api_token,
-                   '--job', job.uuid,
-                   '--git-dir', @arvados_internal]
-
-      if @cgroup_root
-        cmd_args += ['--cgroup-root', @cgroup_root]
-      end
-
-      if @docker_bin
-        cmd_args += ['--docker-bin', @docker_bin]
-      end
-
-      if @docker_run_args
-        cmd_args += ['--docker-run-args', @docker_run_args]
-      end
-
-      if @srun_sync_timeout
-        cmd_args += ['--srun-sync-timeout', @srun_sync_timeout]
-      end
-
-      if have_job_lock?(job)
-        cmd_args << "--force-unlock"
-      end
-
-      $stderr.puts "dispatch: #{cmd_args.join ' '}"
-
-      begin
-        i, o, e, t = Open3.popen3(*cmd_args)
-      rescue
-        $stderr.puts "dispatch: popen3: #{$!}"
-        # This is a dispatch problem like "Too many open files";
-        # retrying another job right away would be futile. Just return
-        # and hope things are better next time, after (at least) a
-        # did_recently() delay.
-        return
-      end
-
-      $stderr.puts "dispatch: job #{job.uuid}"
-      start_banner = "dispatch: child #{t.pid} start #{LogTime.now}"
-      $stderr.puts start_banner
-
-      @running[job.uuid] = {
-        stdin: i,
-        stdout: o,
-        stderr: e,
-        wait_thr: t,
-        job: job,
-        buf: {stderr: '', stdout: ''},
-        started: false,
-        sent_int: 0,
-        job_auth: @authorizations[job.uuid],
-        stderr_buf_to_flush: '',
-        stderr_flushed_at: Time.new(0),
-        bytes_logged: 0,
-        events_logged: 0,
-        log_throttle_is_open: true,
-        log_throttle_reset_time: Time.now + Rails.configuration.Containers.Logging.LogThrottlePeriod,
-        log_throttle_bytes_so_far: 0,
-        log_throttle_lines_so_far: 0,
-        log_throttle_bytes_skipped: 0,
-        log_throttle_partial_line_last_at: Time.new(0),
-        log_throttle_first_partial_line: true,
-      }
-      i.close
-      @todo_job_retries.delete(job.uuid)
-      update_node_status
-    end
-  end
-
-  # Test for hard cap on total output and for log throttling.  Returns whether
-  # the log line should go to output or not.  Modifies "line" in place to
-  # replace it with an error if a logging limit is tripped.
-  def rate_limit running_job, line
-    message = false
-    linesize = line.size
-    if running_job[:log_throttle_is_open]
-      partial_line = false
-      skip_counts = false
-      matches = line.match(/^\S+ \S+ \d+ \d+ stderr (.*)/)
-      if matches and matches[1] and matches[1].start_with?('[...]') and matches[1].end_with?('[...]')
-        partial_line = true
-        if Time.now > running_job[:log_throttle_partial_line_last_at] + Rails.configuration.Containers.Logging.LogPartialLineThrottlePeriod
-          running_job[:log_throttle_partial_line_last_at] = Time.now
-        else
-          skip_counts = true
-        end
-      end
-
-      if !skip_counts
-        running_job[:log_throttle_lines_so_far] += 1
-        running_job[:log_throttle_bytes_so_far] += linesize
-        running_job[:bytes_logged] += linesize
-      end
-
-      if (running_job[:bytes_logged] >
-          Rails.configuration.Containers.Logging.LimitLogBytesPerJob)
-        message = "Exceeded log limit #{Rails.configuration.Containers.Logging.LimitLogBytesPerJob} bytes (LimitLogBytesPerJob). Log will be truncated."
-        running_job[:log_throttle_reset_time] = Time.now + 100.years
-        running_job[:log_throttle_is_open] = false
-
-      elsif (running_job[:log_throttle_bytes_so_far] >
-             Rails.configuration.Containers.Logging.LogThrottleBytes)
-        remaining_time = running_job[:log_throttle_reset_time] - Time.now
-        message = "Exceeded rate #{Rails.configuration.Containers.Logging.LogThrottleBytes} bytes per #{Rails.configuration.Containers.Logging.LogThrottlePeriod} seconds (LogThrottleBytes). Logging will be silenced for the next #{remaining_time.round} seconds."
-        running_job[:log_throttle_is_open] = false
-
-      elsif (running_job[:log_throttle_lines_so_far] >
-             Rails.configuration.Containers.Logging.LogThrottleLines)
-        remaining_time = running_job[:log_throttle_reset_time] - Time.now
-        message = "Exceeded rate #{Rails.configuration.Containers.Logging.LogThrottleLines} lines per #{Rails.configuration.Containers.Logging.LogThrottlePeriod} seconds (LogThrottleLines), logging will be silenced for the next #{remaining_time.round} seconds."
-        running_job[:log_throttle_is_open] = false
-
-      elsif partial_line and running_job[:log_throttle_first_partial_line]
-        running_job[:log_throttle_first_partial_line] = false
-        message = "Rate-limiting partial segments of long lines to one every #{Rails.configuration.Containers.Logging.LogPartialLineThrottlePeriod} seconds."
-      end
-    end
-
-    if not running_job[:log_throttle_is_open]
-      # Don't log anything if any limit has been exceeded. Just count lossage.
-      running_job[:log_throttle_bytes_skipped] += linesize
-    end
-
-    if message
-      # Yes, write to logs, but use our "rate exceeded" message
-      # instead of the log message that exceeded the limit.
-      message += " A complete log is still being written to Keep, and will be available when the job finishes.\n"
-      line.replace message
-      true
-    elsif partial_line
-      false
-    else
-      running_job[:log_throttle_is_open]
-    end
-  end
-
-  def read_pipes
-    @running.each do |job_uuid, j|
-      now = Time.now
-      if now > j[:log_throttle_reset_time]
-        # It has been more than throttle_period seconds since the last
-        # checkpoint so reset the throttle
-        if j[:log_throttle_bytes_skipped] > 0
-          message = "#{job_uuid} ! Skipped #{j[:log_throttle_bytes_skipped]} bytes of log"
-          $stderr.puts message
-          j[:stderr_buf_to_flush] << "#{LogTime.now} #{message}\n"
-        end
-
-        j[:log_throttle_reset_time] = now + Rails.configuration.Containers.Logging.LogThrottlePeriod
-        j[:log_throttle_bytes_so_far] = 0
-        j[:log_throttle_lines_so_far] = 0
-        j[:log_throttle_bytes_skipped] = 0
-        j[:log_throttle_is_open] = true
-        j[:log_throttle_partial_line_last_at] = Time.new(0)
-        j[:log_throttle_first_partial_line] = true
-      end
-
-      j[:buf].each do |stream, streambuf|
-        # Read some data from the child stream
-        buf = ''
-        begin
-          # It's important to use a big enough buffer here. When we're
-          # being flooded with logs, we must read and discard many
-          # bytes at once. Otherwise, we can easily peg a CPU with
-          # time-checking and other loop overhead. (Quick tests show a
-          # 1MiB buffer working 2.5x as fast as a 64 KiB buffer.)
-          #
-          # So don't reduce this buffer size!
-          buf = j[stream].read_nonblock(2**20)
-        rescue Errno::EAGAIN, EOFError
-        end
-
-        # Short circuit the counting code if we're just going to throw
-        # away the data anyway.
-        if not j[:log_throttle_is_open]
-          j[:log_throttle_bytes_skipped] += streambuf.size + buf.size
-          streambuf.replace ''
-          next
-        elsif buf == ''
-          next
-        end
-
-        # Append to incomplete line from previous read, if any
-        streambuf << buf
-
-        bufend = ''
-        streambuf.each_line do |line|
-          if not line.end_with? $/
-            if line.size > Rails.configuration.Containers.Logging.LogThrottleBytes
-              # Without a limit here, we'll use 2x an arbitrary amount
-              # of memory, and waste a lot of time copying strings
-              # around, all without providing any feedback to anyone
-              # about what's going on _or_ hitting any of our throttle
-              # limits.
-              #
-              # Here we leave "line" alone, knowing it will never be
-              # sent anywhere: rate_limit() will reach
-              # crunch_log_throttle_bytes immediately. However, we'll
-              # leave [...] in bufend: if the trailing end of the long
-              # line does end up getting sent anywhere, it will have
-              # some indication that it is incomplete.
-              bufend = "[...]"
-            else
-              # If line length is sane, we'll wait for the rest of the
-              # line to appear in the next read_pipes() call.
-              bufend = line
-              break
-            end
-          end
-          # rate_limit returns true or false as to whether to actually log
-          # the line or not.  It also modifies "line" in place to replace
-          # it with an error if a logging limit is tripped.
-          if rate_limit j, line
-            $stderr.print "#{job_uuid} ! " unless line.index(job_uuid)
-            $stderr.puts line
-            pub_msg = "#{LogTime.now} #{line.strip}\n"
-            j[:stderr_buf_to_flush] << pub_msg
-          end
-        end
-
-        # Leave the trailing incomplete line (if any) in streambuf for
-        # next time.
-        streambuf.replace bufend
-      end
-      # Flush buffered logs to the logs table, if appropriate. We have
-      # to do this even if we didn't collect any new logs this time:
-      # otherwise, buffered data older than seconds_between_events
-      # won't get flushed until new data arrives.
-      write_log j
-    end
-  end
-
-  def reap_children
-    return if 0 == @running.size
-    pid_done = nil
-    j_done = nil
-
-    @running.each do |uuid, j|
-      if !j[:wait_thr].status
-        pid_done = j[:wait_thr].pid
-        j_done = j
-        break
-      end
-    end
-
-    return if !pid_done
-
-    job_done = j_done[:job]
-
-    # Ensure every last drop of stdout and stderr is consumed.
-    read_pipes
-    # Reset flush timestamp to make sure log gets written.
-    j_done[:stderr_flushed_at] = Time.new(0)
-    # Write any remaining logs.
-    write_log j_done
-
-    j_done[:buf].each do |stream, streambuf|
-      if streambuf != ''
-        $stderr.puts streambuf + "\n"
-      end
-    end
-
-    # Wait the thread (returns a Process::Status)
-    exit_status = j_done[:wait_thr].value.exitstatus
-    exit_tempfail = exit_status == EXIT_TEMPFAIL
-
-    $stderr.puts "dispatch: child #{pid_done} exit #{exit_status}"
-    $stderr.puts "dispatch: job #{job_done.uuid} end"
-
-    jobrecord = Job.find_by_uuid(job_done.uuid)
-
-    if exit_status == EXIT_RETRY_UNLOCKED or (exit_tempfail and @job_retry_counts.include? jobrecord.uuid)
-      $stderr.puts("dispatch: job #{jobrecord.uuid} was interrupted by node failure")
-      # Only this crunch-dispatch process can retry the job:
-      # it's already locked, and there's no way to put it back in the
-      # Queued state.  Put it in our internal todo list unless the job
-      # has failed this way excessively.
-      @job_retry_counts[jobrecord.uuid] += 1
-      exit_tempfail = @job_retry_counts[jobrecord.uuid] <= RETRY_UNLOCKED_LIMIT
-      do_what_next = "give up now"
-      if exit_tempfail
-        @todo_job_retries[jobrecord.uuid] = jobrecord
-        do_what_next = "re-attempt"
-      end
-      $stderr.puts("dispatch: job #{jobrecord.uuid} has been interrupted " +
-                   "#{@job_retry_counts[jobrecord.uuid]}x, will #{do_what_next}")
-    end
-
-    if !exit_tempfail
-      @job_retry_counts.delete(jobrecord.uuid)
-      if jobrecord.state == "Running"
-        # Apparently there was an unhandled error.  That could potentially
-        # include "all allocated nodes failed" when we don't to retry
-        # because the job has already been retried RETRY_UNLOCKED_LIMIT
-        # times.  Fail the job.
-        jobrecord.state = "Failed"
-        if not jobrecord.save
-          $stderr.puts "dispatch: jobrecord.save failed"
-        end
-      end
-    else
-      # If the job failed to run due to an infrastructure
-      # issue with crunch-job or slurm, we want the job to stay in the
-      # queue. If crunch-job exited after losing a race to another
-      # crunch-job process, it exits 75 and we should leave the job
-      # record alone so the winner of the race can do its thing.
-      # If crunch-job exited after all of its allocated nodes failed,
-      # it exits 93, and we want to retry it later (see the
-      # EXIT_RETRY_UNLOCKED `if` block).
-      #
-      # There is still an unhandled race condition: If our crunch-job
-      # process is about to lose a race with another crunch-job
-      # process, but crashes before getting to its "exit 75" (for
-      # example, "cannot fork" or "cannot reach API server") then we
-      # will assume incorrectly that it's our process's fault
-      # jobrecord.started_at is non-nil, and mark the job as failed
-      # even though the winner of the race is probably still doing
-      # fine.
-    end
-
-    # Invalidate the per-job auth token, unless the job is still queued and we
-    # might want to try it again.
-    if jobrecord.state != "Queued" and !@todo_job_retries.include?(jobrecord.uuid)
-      j_done[:job_auth].update_attributes expires_at: Time.now
-    end
-
-    @running.delete job_done.uuid
-  end
-
-  def update_pipelines
-    expire_tokens = @pipe_auth_tokens.dup
-    @todo_pipelines.each do |p|
-      pipe_auth = (@pipe_auth_tokens[p.uuid] ||= ApiClientAuthorization.
-                   create(user: User.where('uuid=?', p.modified_by_user_uuid).first,
-                          api_client_id: 0))
-      puts `export ARVADOS_API_TOKEN=#{pipe_auth.api_token} && arv-run-pipeline-instance --run-pipeline-here --no-wait --instance #{p.uuid}`
-      expire_tokens.delete p.uuid
-    end
-
-    expire_tokens.each do |k, v|
-      v.update_attributes expires_at: Time.now
-      @pipe_auth_tokens.delete k
-    end
-  end
-
-  def parse_argv argv
-    @runoptions = {}
-    (argv.any? ? argv : ['--jobs', '--pipelines']).each do |arg|
-      case arg
-      when '--jobs'
-        @runoptions[:jobs] = true
-      when '--pipelines'
-        @runoptions[:pipelines] = true
-      else
-        abort "Unrecognized command line option '#{arg}'"
-      end
-    end
-    if not (@runoptions[:jobs] or @runoptions[:pipelines])
-      abort "Nothing to do. Please specify at least one of: --jobs, --pipelines."
-    end
-  end
-
-  def run argv
-    parse_argv argv
-
-    # We want files written by crunch-dispatch to be writable by other
-    # processes with the same GID, see bug #7228
-    File.umask(0002)
-
-    # This is how crunch-job child procs know where the "refresh"
-    # trigger file is
-    ENV["CRUNCH_REFRESH_TRIGGER"] = Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger
-
-    # If salloc can't allocate resources immediately, make it use our
-    # temporary failure exit code.  This ensures crunch-dispatch won't
-    # mark a job failed because of an issue with node allocation.
-    # This often happens when another dispatcher wins the race to
-    # allocate nodes.
-    ENV["SLURM_EXIT_IMMEDIATE"] = CrunchDispatch::EXIT_TEMPFAIL.to_s
-
-    if ENV["CRUNCH_DISPATCH_LOCKFILE"]
-      lockfilename = ENV.delete "CRUNCH_DISPATCH_LOCKFILE"
-      lockfile = File.open(lockfilename, File::RDWR|File::CREAT, 0644)
-      unless lockfile.flock File::LOCK_EX|File::LOCK_NB
-        abort "Lock unavailable on #{lockfilename} - exit"
-      end
-    end
-
-    @signal = {}
-    %w{TERM INT}.each do |sig|
-      signame = sig
-      Signal.trap(sig) do
-        $stderr.puts "Received #{signame} signal"
-        @signal[:term] = true
-      end
-    end
-
-    act_as_system_user
-    User.first.group_permissions
-    $stderr.puts "dispatch: ready"
-    while !@signal[:term] or @running.size > 0
-      read_pipes
-      if @signal[:term]
-        @running.each do |uuid, j|
-          if !j[:started] and j[:sent_int] < 2
-            begin
-              Process.kill 'INT', j[:wait_thr].pid
-            rescue Errno::ESRCH
-              # No such pid = race condition + desired result is
-              # already achieved
-            end
-            j[:sent_int] += 1
-          end
-        end
-      else
-        refresh_todo unless did_recently(:refresh_todo, 1.0)
-        update_node_status unless did_recently(:update_node_status, 1.0)
-        unless @todo.empty? or did_recently(:start_jobs, 1.0) or @signal[:term]
-          start_jobs
-        end
-        unless (@todo_pipelines.empty? and @pipe_auth_tokens.empty?) or did_recently(:update_pipelines, 5.0)
-          update_pipelines
-        end
-        unless did_recently('check_orphaned_slurm_jobs', 60)
-          check_orphaned_slurm_jobs
-        end
-      end
-      reap_children
-      select(@running.values.collect { |j| [j[:stdout], j[:stderr]] }.flatten,
-             [], [], 1)
-    end
-    # If there are jobs we wanted to retry, we have to mark them as failed now.
-    # Other dispatchers can't pick them up because we hold their lock.
-    @todo_job_retries.each_key do |job_uuid|
-      job = Job.find_by_uuid(job_uuid)
-      if job.state == "Running"
-        fail_job(job, "crunch-dispatch was stopped during job's tempfail retry loop")
-      end
-    end
-  end
-
-  def fail_jobs before: nil
-    act_as_system_user do
-      threshold = nil
-      if before == 'reboot'
-        boottime = nil
-        open('/proc/stat').map(&:split).each do |stat, t|
-          if stat == 'btime'
-            boottime = t
-          end
-        end
-        if not boottime
-          raise "Could not find btime in /proc/stat"
-        end
-        threshold = Time.at(boottime.to_i)
-      elsif before
-        threshold = Time.parse(before, Time.now)
-      else
-        threshold = db_current_time
-      end
-      Rails.logger.info "fail_jobs: threshold is #{threshold}"
-
-      squeue = squeue_jobs
-      Job.where('state = ? and started_at < ?', Job::Running, threshold).
-        each do |job|
-        Rails.logger.debug "fail_jobs: #{job.uuid} started #{job.started_at}"
-        squeue.each do |slurm_name|
-          if slurm_name == job.uuid
-            Rails.logger.info "fail_jobs: scancel #{job.uuid}"
-            scancel slurm_name
-          end
-        end
-        fail_job(job, "cleaned up stale job: started before #{threshold}",
-                 skip_lock: true)
-      end
-    end
-  end
-
-  def check_orphaned_slurm_jobs
-    act_as_system_user do
-      squeue_uuids = squeue_jobs.select{|uuid| uuid.match(/^[0-9a-z]{5}-8i9sb-[0-9a-z]{15}$/)}.
-                                  select{|uuid| !@running.has_key?(uuid)}
-
-      return if squeue_uuids.size == 0
-
-      scancel_uuids = squeue_uuids - Job.where('uuid in (?) and (state in (?) or modified_at>?)',
-                                               squeue_uuids,
-                                               ['Running', 'Queued'],
-                                               (Time.now - 60)).
-                                         collect(&:uuid)
-      scancel_uuids.each do |uuid|
-        Rails.logger.info "orphaned job: scancel #{uuid}"
-        scancel uuid
-      end
-    end
-  end
-
-  def sudo_preface
-    return [] if not Rails.configuration.Containers.JobsAPI.CrunchJobUser
-    ["sudo", "-E", "-u",
-     Rails.configuration.Containers.JobsAPI.CrunchJobUser,
-     "LD_LIBRARY_PATH=#{ENV['LD_LIBRARY_PATH']}",
-     "PATH=#{ENV['PATH']}",
-     "PERLLIB=#{ENV['PERLLIB']}",
-     "PYTHONPATH=#{ENV['PYTHONPATH']}",
-     "RUBYLIB=#{ENV['RUBYLIB']}",
-     "GEM_PATH=#{ENV['GEM_PATH']}"]
-  end
-
-  protected
-
-  def have_job_lock?(job)
-    # Return true if the given job is locked by this crunch-dispatch, normally
-    # because we've run crunch-job for it.
-    @todo_job_retries.include?(job.uuid)
-  end
-
-  def did_recently(thing, min_interval)
-    if !@did_recently[thing] or @did_recently[thing] < Time.now - min_interval
-      @did_recently[thing] = Time.now
-      false
-    else
-      true
-    end
-  end
-
-  # send message to log table. we want these records to be transient
-  def write_log running_job
-    return if running_job[:stderr_buf_to_flush] == ''
-
-    # Send out to log event if buffer size exceeds the bytes per event or if
-    # it has been at least crunch_log_seconds_between_events seconds since
-    # the last flush.
-    if running_job[:stderr_buf_to_flush].size > Rails.configuration.Containers.Logging.LogBytesPerEvent or
-        (Time.now - running_job[:stderr_flushed_at]) >= Rails.configuration.Containers.Logging.LogSecondsBetweenEvents
-      begin
-        log = Log.new(object_uuid: running_job[:job].uuid,
-                      event_type: 'stderr',
-                      owner_uuid: running_job[:job].owner_uuid,
-                      properties: {"text" => running_job[:stderr_buf_to_flush]})
-        log.save!
-        running_job[:events_logged] += 1
-      rescue => exception
-        $stderr.puts "Failed to write logs"
-        $stderr.puts exception.backtrace
-      end
-      running_job[:stderr_buf_to_flush] = ''
-      running_job[:stderr_flushed_at] = Time.now
-    end
-  end
-
-  # An array of job_uuids in squeue
-  def squeue_jobs
-    if Rails.configuration.Containers.JobsAPI.CrunchJobWrapper == "slurm_immediate"
-      p = IO.popen(['squeue', '-a', '-h', '-o', '%j'])
-      begin
-        p.readlines.map {|line| line.strip}
-      ensure
-        p.close
-      end
-    else
-      []
-    end
-  end
-
-  def scancel slurm_name
-    cmd = sudo_preface + ['scancel', '-n', slurm_name]
-    IO.popen(cmd) do |scancel_pipe|
-      puts scancel_pipe.read
-    end
-    if not $?.success?
-      Rails.logger.error "scancel #{slurm_name.shellescape}: $?"
-    end
-  end
-end
index a4fdc5a1e785119d943a0db21d5f69f11a0ae701..1a96a81ad66708f8e45c032737a821d3a5d12ebc 100644 (file)
@@ -2,6 +2,15 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+Disable_update_jobs_api_method_list = {"jobs.create"=>{},
+                                "pipeline_instances.create"=>{},
+                                "pipeline_templates.create"=>{},
+                                "jobs.update"=>{},
+                                "pipeline_instances.update"=>{},
+                                "pipeline_templates.update"=>{},
+                                "job_tasks.create"=>{},
+                                "job_tasks.update"=>{}}
+
 Disable_jobs_api_method_list = {"jobs.create"=>{},
                                 "pipeline_instances.create"=>{},
                                 "pipeline_templates.create"=>{},
@@ -30,6 +39,9 @@ Disable_jobs_api_method_list = {"jobs.create"=>{},
                                 "job_tasks.show"=>{}}
 
 def check_enable_legacy_jobs_api
+  # Create/update is permanently disabled (legacy functionality has been removed)
+  Rails.configuration.API.DisabledAPIs.merge! Disable_update_jobs_api_method_list
+
   if Rails.configuration.Containers.JobsAPI.Enable == "false" ||
      (Rails.configuration.Containers.JobsAPI.Enable == "auto" &&
       Job.count == 0)
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
deleted file mode 100755 (executable)
index 38bd54b..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-dispatch_argv = []
-ARGV.reject! do |arg|
-  dispatch_argv.push(arg) if /^--/ =~ arg
-end
-
-ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development"
-require File.dirname(__FILE__) + '/../config/boot'
-require File.dirname(__FILE__) + '/../config/environment'
-require './lib/crunch_dispatch.rb'
-
-CrunchDispatch.new.run dispatch_argv
diff --git a/services/api/script/crunch_failure_report.py b/services/api/script/crunch_failure_report.py
deleted file mode 100755 (executable)
index 83217d8..0000000
+++ /dev/null
@@ -1,222 +0,0 @@
-#! /usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-import argparse
-import datetime
-import json
-import re
-import sys
-
-import arvados
-
-# Useful configuration variables:
-
-# Number of log lines to use as context in diagnosing failure.
-LOG_CONTEXT_LINES = 10
-
-# Regex that signifies a failed task.
-FAILED_TASK_REGEX = re.compile(' \d+ failure (.*permanent)')
-
-# Regular expressions used to classify failure types.
-JOB_FAILURE_TYPES = {
-    'sys/docker': 'Cannot destroy container',
-    'crunch/node': 'User not found on host',
-    'slurm/comm':  'Communication connection failure'
-}
-
-def parse_arguments(arguments):
-    arg_parser = argparse.ArgumentParser(
-        description='Produce a report of Crunch failures within a specified time range')
-
-    arg_parser.add_argument(
-        '--start',
-        help='Start date and time')
-    arg_parser.add_argument(
-        '--end',
-        help='End date and time')
-
-    args = arg_parser.parse_args(arguments)
-
-    if args.start and not is_valid_timestamp(args.start):
-        raise ValueError(args.start)
-    if args.end and not is_valid_timestamp(args.end):
-        raise ValueError(args.end)
-
-    return args
-
-
-def api_timestamp(when=None):
-    """Returns a string representing the timestamp 'when' in a format
-    suitable for delivering to the API server.  Defaults to the
-    current time.
-    """
-    if when is None:
-        when = datetime.datetime.utcnow()
-    return when.strftime("%Y-%m-%dT%H:%M:%SZ")
-
-
-def is_valid_timestamp(ts):
-    return re.match(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z', ts)
-
-
-def jobs_created_between_dates(api, start, end):
-    return arvados.util.list_all(
-        api.jobs().list,
-        filters=json.dumps([ ['created_at', '>=', start],
-                             ['created_at', '<=', end] ]))
-
-
-def job_logs(api, job):
-    # Returns the contents of the log for this job (as an array of lines).
-    if job['log']:
-        log_collection = arvados.CollectionReader(job['log'], api)
-        log_filename = "{}.log.txt".format(job['uuid'])
-        return log_collection.open(log_filename).readlines()
-    return []
-
-
-user_names = {}
-def job_user_name(api, user_uuid):
-    def _lookup_user_name(api, user_uuid):
-        try:
-            return api.users().get(uuid=user_uuid).execute()['full_name']
-        except arvados.errors.ApiError:
-            return user_uuid
-
-    if user_uuid not in user_names:
-        user_names[user_uuid] = _lookup_user_name(api, user_uuid)
-    return user_names[user_uuid]
-
-
-job_pipeline_names = {}
-def job_pipeline_name(api, job_uuid):
-    def _lookup_pipeline_name(api, job_uuid):
-        try:
-            pipelines = api.pipeline_instances().list(
-                filters='[["components", "like", "%{}%"]]'.format(job_uuid)).execute()
-            pi = pipelines['items'][0]
-            if pi['name']:
-                return pi['name']
-            else:
-                # Use the pipeline template name
-                pt = api.pipeline_templates().get(uuid=pi['pipeline_template_uuid']).execute()
-                return pt['name']
-        except (TypeError, ValueError, IndexError):
-            return ""
-
-    if job_uuid not in job_pipeline_names:
-        job_pipeline_names[job_uuid] = _lookup_pipeline_name(api, job_uuid)
-    return job_pipeline_names[job_uuid]
-
-
-def is_failed_task(logline):
-    return FAILED_TASK_REGEX.search(logline) != None
-
-
-def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
-    args = parse_arguments(arguments)
-
-    api = arvados.api('v1')
-
-    now = datetime.datetime.utcnow()
-    start_time = args.start or api_timestamp(now - datetime.timedelta(days=1))
-    end_time = args.end or api_timestamp(now)
-
-    # Find all jobs created within the specified window,
-    # and their corresponding job logs.
-    jobs_created = jobs_created_between_dates(api, start_time, end_time)
-    jobs_by_state = {}
-    for job in jobs_created:
-        jobs_by_state.setdefault(job['state'], [])
-        jobs_by_state[job['state']].append(job)
-
-    # Find failed jobs and record the job failure text.
-
-    # failure_stats maps failure types (e.g. "sys/docker") to
-    # a set of job UUIDs that failed for that reason.
-    failure_stats = {}
-    for job in jobs_by_state['Failed']:
-        job_uuid = job['uuid']
-        logs = job_logs(api, job)
-        # Find the first permanent task failure, and collect the
-        # preceding log lines.
-        failure_type = None
-        for i, lg in enumerate(logs):
-            if is_failed_task(lg):
-                # Get preceding log record to provide context.
-                log_start = i - LOG_CONTEXT_LINES if i >= LOG_CONTEXT_LINES else 0
-                log_end = i + 1
-                lastlogs = ''.join(logs[log_start:log_end])
-                # try to identify the type of failure.
-                for key, rgx in JOB_FAILURE_TYPES.iteritems():
-                    if re.search(rgx, lastlogs):
-                        failure_type = key
-                        break
-            if failure_type is not None:
-                break
-        if failure_type is None:
-            failure_type = 'unknown'
-        failure_stats.setdefault(failure_type, set())
-        failure_stats[failure_type].add(job_uuid)
-
-    # Report percentages of successful, failed and unfinished jobs.
-    print "Start: {:20s}".format(start_time)
-    print "End:   {:20s}".format(end_time)
-    print ""
-
-    print "Overview"
-    print ""
-
-    job_start_count = len(jobs_created)
-    print "  {: <25s} {:4d}".format('Started', job_start_count)
-    for state in ['Complete', 'Failed', 'Queued', 'Cancelled', 'Running']:
-        if state in jobs_by_state:
-            job_count = len(jobs_by_state[state])
-            job_percentage = job_count / float(job_start_count)
-            print "  {: <25s} {:4d} ({: >4.0%})".format(state,
-                                                        job_count,
-                                                        job_percentage)
-    print ""
-
-    # Report failure types.
-    failure_summary = ""
-    failure_detail = ""
-
-    # Generate a mapping from failed job uuids to job records, to assist
-    # in generating detailed statistics for job failures.
-    jobs_failed_map = { job['uuid']: job for job in jobs_by_state.get('Failed', []) }
-
-    # sort the failure stats in descending order by occurrence.
-    sorted_failures = sorted(failure_stats,
-                             reverse=True,
-                             key=lambda failure_type: len(failure_stats[failure_type]))
-    for failtype in sorted_failures:
-        job_uuids = failure_stats[failtype]
-        failstat = "  {: <25s} {:4d} ({: >4.0%})\n".format(
-            failtype,
-            len(job_uuids),
-            len(job_uuids) / float(len(jobs_by_state['Failed'])))
-        failure_summary = failure_summary + failstat
-        failure_detail = failure_detail + failstat
-        for j in job_uuids:
-            job_info = jobs_failed_map[j]
-            job_owner = job_user_name(api, job_info['modified_by_user_uuid'])
-            job_name = job_pipeline_name(api, job_info['uuid'])
-            failure_detail = failure_detail + "    {}  {: <15.15s}  {:29.29s}\n".format(j, job_owner, job_name)
-        failure_detail = failure_detail + "\n"
-
-    print "Failures by class"
-    print ""
-    print failure_summary
-
-    print "Failures by class (detail)"
-    print ""
-    print failure_detail
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/services/api/script/fail-jobs.rb b/services/api/script/fail-jobs.rb
deleted file mode 100755 (executable)
index e52bfc0..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'optimist'
-
-opts = Optimist::options do
-  banner 'Fail jobs that have state=="Running".'
-  banner 'Options:'
-  opt(:before,
-      'fail only jobs that started before the given time (or "reboot")',
-      type: :string)
-end
-
-ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development"
-require File.dirname(__FILE__) + '/../config/boot'
-require File.dirname(__FILE__) + '/../config/environment'
-require Rails.root.join('lib/crunch_dispatch.rb')
-
-CrunchDispatch.new.fail_jobs before: opts[:before]
index d10ab6a71b41b330f2d653ee0372d1b88679cf7a..02c5c6892ce8e01abbbd8278024e8bec42af613f 100644 (file)
@@ -8,370 +8,11 @@ require 'helpers/git_test_helper'
 class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
   fixtures :repositories, :users, :jobs, :links, :collections
 
-  # See git_setup.rb for the commit log for test.git.tar
-  include GitTestHelper
-
   setup do
     @controller = Arvados::V1::JobsController.new
     authorize_with :active
   end
 
-  test "reuse job with no_reuse=false" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          an_integer: '1',
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "reuse job with find_or_create=true" do
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      },
-      find_or_create: true
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "no reuse job with null log" do
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '3'
-        }
-      },
-      find_or_create: true
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqq3', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "reuse job with symbolic script_version" do
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "tag1",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      },
-      find_or_create: true
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "do not reuse job because no_reuse=true" do
-    post :create, params: {
-      job: {
-        no_reuse: true,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  [false, "false"].each do |whichfalse|
-    test "do not reuse job because find_or_create=#{whichfalse.inspect}" do
-      post :create, params: {
-        job: {
-          script: "hash",
-          script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-          repository: "active/foo",
-          script_parameters: {
-            input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-            an_integer: '1'
-          }
-        },
-        find_or_create: whichfalse
-      }
-      assert_response :success
-      assert_not_nil assigns(:object)
-      new_job = JSON.parse(@response.body)
-      assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-      assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-    end
-  end
-
-  test "do not reuse job because output is not readable by user" do
-    authorize_with :job_reader
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      },
-      find_or_create: true
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_no_output" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '2'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykppp', new_job['uuid']
-  end
-
-  test "test_reuse_job_range" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        minimum_script_version: "tag1",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "cannot_reuse_job_no_minimum_given_so_must_use_specified_commit" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '077ba2ad3ea24a929091a9e6ce545c93199b8e57', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_different_input" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '2'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_different_version" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '2'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '077ba2ad3ea24a929091a9e6ce545c93199b8e57', new_job['script_version']
-  end
-
-  test "test_can_reuse_job_submitted_nondeterministic" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        },
-        nondeterministic: true
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_past_nondeterministic" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash2",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykyyy', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_no_permission" do
-    authorize_with :spectator
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_excluded" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        minimum_script_version: "31ce37fe365b3dc204300a3e4c396ad333ed0556",
-        script_version: "master",
-        repository: "active/foo",
-        exclude_script_versions: ["tag1"],
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_not_equal('4fe459abe02d9b365932b8f5dc419439ab4e2577',
-                     new_job['script_version'])
-  end
-
-  test "cannot reuse job with find_or_create but excluded version" do
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      },
-      find_or_create: true,
-      minimum_script_version: "31ce37fe365b3dc204300a3e4c396ad333ed0556",
-      exclude_script_versions: ["tag1"],
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_not_equal('4fe459abe02d9b365932b8f5dc419439ab4e2577',
-                     new_job['script_version'])
-  end
-
-  test "cannot reuse job when hash-like branch includes newer commit" do
-    check_new_job_created_from({job: {script_version: "738783"}},
-                               :previous_job_run_superseded_by_hash_branch)
-  end
-
   BASE_FILTERS = {
     'repository' => ['=', 'active/foo'],
     'script' => ['=', 'hash'],
@@ -384,217 +25,6 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
     hash.each_pair.map { |name, filter| [name] + filter }
   end
 
-  test "can reuse a Job based on filters" do
-    filters_hash = BASE_FILTERS.
-      merge('script_version' => ['in git', 'tag1'])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "master",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             }
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "can not reuse a Job based on filters" do
-    filters = filters_from_hash(BASE_FILTERS
-                                  .reject { |k| k == 'script_version' })
-    filters += [["script_version", "in git",
-                 "31ce37fe365b3dc204300a3e4c396ad333ed0556"],
-                ["script_version", "not in git", ["tag1"]]]
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "master",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             }
-           },
-           filters: filters,
-           find_or_create: true,
-         })
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '077ba2ad3ea24a929091a9e6ce545c93199b8e57', new_job['script_version']
-  end
-
-  test "can not reuse a Job based on arbitrary filters" do
-    filters_hash = BASE_FILTERS.
-      merge("created_at" => ["<", "2010-01-01T00:00:00Z"])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             }
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "can reuse a Job with a Docker image" do
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             },
-             runtime_constraints: {
-               docker_image: 'arvados/apitestfixture',
-             }
-           },
-           find_or_create: true,
-         })
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    target_job = jobs(:previous_docker_job_run)
-    [:uuid, :script_version, :docker_image_locator].each do |attr|
-      assert_equal(target_job.send(attr), new_job.send(attr))
-    end
-  end
-
-  test "can reuse a Job with a Docker image hash filter" do
-    filters_hash = BASE_FILTERS.
-      merge("script_version" =>
-              ["=", "4fe459abe02d9b365932b8f5dc419439ab4e2577"],
-            "docker_image_locator" =>
-              ["in docker", links(:docker_image_collection_hash).name])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             },
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    target_job = jobs(:previous_docker_job_run)
-    [:uuid, :script_version, :docker_image_locator].each do |attr|
-      assert_equal(target_job.send(attr), new_job.send(attr))
-    end
-  end
-
-  test "reuse Job with Docker image repo+tag" do
-    filters_hash = BASE_FILTERS.
-      merge("script_version" =>
-              ["=", "4fe459abe02d9b365932b8f5dc419439ab4e2577"],
-            "docker_image_locator" =>
-              ["in docker", links(:docker_image_collection_tag2).name])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             },
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    target_job = jobs(:previous_docker_job_run)
-    [:uuid, :script_version, :docker_image_locator].each do |attr|
-      assert_equal(target_job.send(attr), new_job.send(attr))
-    end
-  end
-
-  test "new job with unknown Docker image filter" do
-    filters_hash = BASE_FILTERS.
-      merge("docker_image_locator" => ["in docker", "_nonesuchname_"])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             },
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    assert_not_equal(jobs(:previous_docker_job_run).uuid, new_job.uuid)
-  end
-
-  test "don't reuse job using older Docker image of same name" do
-    jobspec = {runtime_constraints: {
-        docker_image: "arvados/apitestfixture",
-      }}
-    check_new_job_created_from({job: jobspec},
-                               :previous_ancient_docker_image_job_run)
-  end
-
-  test "reuse job with Docker image that has hash name" do
-    jobspec = {runtime_constraints: {
-        docker_image: "a" * 64,
-      }}
-    check_job_reused_from(jobspec, :previous_docker_job_run)
-  end
-
-  ["repository", "script"].each do |skip_key|
-    test "missing #{skip_key} filter raises an error" do
-      filters = filters_from_hash(BASE_FILTERS.reject { |k| k == skip_key })
-      post(:create, params: {
-             job: {
-               script: "hash",
-               script_version: "master",
-               repository: "active/foo",
-               script_parameters: {
-                 input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-                 an_integer: '1'
-               }
-             },
-             filters: filters,
-             find_or_create: true,
-           })
-      assert_includes(405..599, @response.code.to_i,
-                      "bad status code with missing #{skip_key} filter")
-    end
-  end
-
   test "find Job with script version range" do
     get :index, params: {
       filters: [["repository", "=", "active/foo"],
@@ -673,136 +103,4 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
                     jobs(:previous_docker_job_run).uuid)
   end
 
-  JOB_SUBMIT_KEYS = [:script, :script_parameters, :script_version, :repository]
-  DEFAULT_START_JOB = :previous_job_run
-
-  def create_job_params(params, start_from=DEFAULT_START_JOB)
-    if not params.has_key?(:find_or_create)
-      params[:find_or_create] = true
-    end
-    job_attrs = params.delete(:job) || {}
-    start_job = jobs(start_from)
-    params[:job] = Hash[JOB_SUBMIT_KEYS.map do |key|
-                          [key, start_job.send(key)]
-                        end]
-    params[:job][:runtime_constraints] =
-      job_attrs.delete(:runtime_constraints) || {}
-    { arvados_sdk_version: :arvados_sdk_version,
-      docker_image_locator: :docker_image }.each do |method, constraint_key|
-      if constraint_value = start_job.send(method)
-        params[:job][:runtime_constraints][constraint_key] ||= constraint_value
-      end
-    end
-    params[:job].merge!(job_attrs)
-    params
-  end
-
-  def create_job_from(params, start_from)
-    post(:create, params: create_job_params(params, start_from))
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    new_job
-  end
-
-  def check_new_job_created_from(params, start_from=DEFAULT_START_JOB)
-    start_time = Time.now
-    new_job = create_job_from(params, start_from)
-    assert_operator(start_time, :<=, new_job.created_at)
-    new_job
-  end
-
-  def check_job_reused_from(params, start_from)
-    new_job = create_job_from(params, start_from)
-    assert_equal(jobs(start_from).uuid, new_job.uuid)
-  end
-
-  def check_errors_from(params, start_from=DEFAULT_START_JOB)
-    post(:create, params: create_job_params(params, start_from))
-    assert_includes(405..499, @response.code.to_i)
-    errors = json_response.fetch("errors", [])
-    assert(errors.any?, "no errors assigned from #{params}")
-    refute(errors.any? { |msg| msg =~ /^#<[A-Za-z]+: / },
-           "errors include raw exception: #{errors.inspect}")
-    errors
-  end
-
-  # 1de84a8 is on the b1 branch, after master's tip.
-  test "new job created from unsatisfiable minimum version filter" do
-    filters_hash = BASE_FILTERS.merge("script_version" => ["in git", "1de84a8"])
-    check_new_job_created_from(filters: filters_from_hash(filters_hash))
-  end
-
-  test "new job created from unsatisfiable minimum version parameter" do
-    check_new_job_created_from(minimum_script_version: "1de84a8")
-  end
-
-  test "new job created from unsatisfiable minimum version attribute" do
-    check_new_job_created_from(job: {minimum_script_version: "1de84a8"})
-  end
-
-  test "graceful error from nonexistent minimum version filter" do
-    filters_hash = BASE_FILTERS.merge("script_version" =>
-                                      ["in git", "__nosuchbranch__"])
-    errors = check_errors_from(filters: filters_from_hash(filters_hash))
-    assert(errors.any? { |msg| msg.include? "__nosuchbranch__" },
-           "bad refspec not mentioned in error message")
-  end
-
-  test "graceful error from nonexistent minimum version parameter" do
-    errors = check_errors_from(minimum_script_version: "__nosuchbranch__")
-    assert(errors.any? { |msg| msg.include? "__nosuchbranch__" },
-           "bad refspec not mentioned in error message")
-  end
-
-  test "graceful error from nonexistent minimum version attribute" do
-    errors = check_errors_from(job: {minimum_script_version: "__nosuchbranch__"})
-    assert(errors.any? { |msg| msg.include? "__nosuchbranch__" },
-           "bad refspec not mentioned in error message")
-  end
-
-  test "don't reuse job with older Arvados SDK version specified by branch" do
-    jobspec = {runtime_constraints: {
-        arvados_sdk_version: "master",
-      }}
-    check_new_job_created_from({job: jobspec},
-                               :previous_job_run_with_arvados_sdk_version)
-  end
-
-  test "don't reuse job with older Arvados SDK version specified by commit" do
-    jobspec = {runtime_constraints: {
-        arvados_sdk_version: "ca68b24e51992e790f29df5cc4bc54ce1da4a1c2",
-      }}
-    check_new_job_created_from({job: jobspec},
-                               :previous_job_run_with_arvados_sdk_version)
-  end
-
-  test "don't reuse job with newer Arvados SDK version specified by commit" do
-    jobspec = {runtime_constraints: {
-        arvados_sdk_version: "436637c87a1d2bdbf4b624008304064b6cf0e30c",
-      }}
-    check_new_job_created_from({job: jobspec},
-                               :previous_job_run_with_arvados_sdk_version)
-  end
-
-  test "reuse job from arvados_sdk_version git filters" do
-    prev_job = jobs(:previous_job_run_with_arvados_sdk_version)
-    filters_hash = BASE_FILTERS.
-      merge("arvados_sdk_version" => ["in git", "commit2"],
-            "docker_image_locator" => ["=", prev_job.docker_image_locator])
-    filters_hash.delete("script_version")
-    params = create_job_params(filters: filters_from_hash(filters_hash))
-    post(:create, params: params)
-    assert_response :success
-    assert_equal(prev_job.uuid, assigns(:object).uuid)
-  end
-
-  test "create new job because of arvados_sdk_version 'not in git' filters" do
-    filters_hash = BASE_FILTERS.reject { |k| k == "script_version" }
-    filters = filters_from_hash(filters_hash)
-    # Allow anything from the root commit, but before commit 2.
-    filters += [["arvados_sdk_version", "in git", "436637c8"],
-                ["arvados_sdk_version", "not in git", "00634b2b"]]
-    check_new_job_created_from(filters: filters)
-  end
 end
index 3803a0dc45d8928d16e7174fdcb55f4e618dfb04..9298f23d5436c0f7d9420cda89884b4e0401b6fa 100644 (file)
@@ -7,172 +7,6 @@ require 'helpers/git_test_helper'
 
 class Arvados::V1::JobsControllerTest < ActionController::TestCase
 
-  include GitTestHelper
-
-  test "submit a job" do
-    authorize_with :active
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {}
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_nil new_job['uuid']
-    assert_not_nil new_job['script_version'].match(/^[0-9a-f]{40}$/)
-    assert_equal 0, new_job['priority']
-  end
-
-  test "normalize output and log uuids when creating job" do
-    authorize_with :active
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        script_parameters: {},
-        repository: "active/foo",
-        started_at: Time.now,
-        finished_at: Time.now,
-        running: false,
-        success: true,
-        output: 'd41d8cd98f00b204e9800998ecf8427e+0+K@xyzzy',
-        log: 'd41d8cd98f00b204e9800998ecf8427e+0+K@xyzzy'
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = assigns(:object)
-    assert_equal 'd41d8cd98f00b204e9800998ecf8427e+0', new_job['log']
-    assert_equal 'd41d8cd98f00b204e9800998ecf8427e+0', new_job['output']
-    version = new_job['script_version']
-
-    # Make sure version doesn't get mangled by normalize
-    assert_not_nil version.match(/^[0-9a-f]{40}$/)
-    assert_equal 'master', json_response['supplied_script_version']
-  end
-
-  test "normalize output and log uuids when updating job" do
-    authorize_with :active
-
-    foobar_job = jobs(:foobar)
-
-    new_output = 'd41d8cd98f00b204e9800998ecf8427e+0+K@xyzzy'
-    new_log = 'd41d8cd98f00b204e9800998ecf8427e+0+K@xyzzy'
-    put :update, params: {
-      id: foobar_job['uuid'],
-      job: {
-        output: new_output,
-        log: new_log
-      }
-    }
-
-    updated_job = json_response
-    assert_not_equal foobar_job['log'], updated_job['log']
-    assert_not_equal new_log, updated_job['log']  # normalized during update
-    assert_equal new_log[0,new_log.rindex('+')], updated_job['log']
-    assert_not_equal foobar_job['output'], updated_job['output']
-    assert_not_equal new_output, updated_job['output']  # normalized during update
-    assert_equal new_output[0,new_output.rindex('+')], updated_job['output']
-  end
-
-  test "cancel a running job" do
-    # We need to verify that "cancel" creates a trigger file, so first
-    # let's make sure there is no stale trigger file.
-    begin
-      File.unlink(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger)
-    rescue Errno::ENOENT
-    end
-
-    authorize_with :active
-    put :update, params: {
-      id: jobs(:running).uuid,
-      job: {
-        cancelled_at: 4.day.ago
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    job = JSON.parse(@response.body)
-    assert_not_nil job['uuid']
-    assert_not_nil job['cancelled_at']
-    assert_not_nil job['cancelled_by_user_uuid']
-    assert_not_nil job['cancelled_by_client_uuid']
-    assert_equal(true, Time.parse(job['cancelled_at']) > 1.minute.ago,
-                 'server should correct bogus cancelled_at ' +
-                 job['cancelled_at'])
-    assert_equal(true,
-                 File.exist?(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger),
-                 'trigger file should be created when job is cancelled')
-  end
-
-  [
-   [:put, :update, {job:{cancelled_at: Time.now}}, :success],
-   [:put, :update, {job:{cancelled_at: nil}}, :unprocessable_entity],
-   [:put, :update, {job:{state: 'Cancelled'}}, :success],
-   [:put, :update, {job:{state: 'Queued'}}, :unprocessable_entity],
-   [:put, :update, {job:{state: 'Running'}}, :unprocessable_entity],
-   [:put, :update, {job:{state: 'Failed'}}, :unprocessable_entity],
-   [:put, :update, {job:{state: 'Complete'}}, :unprocessable_entity],
-   [:post, :cancel, {}, :success],
-  ].each do |http_method, action, params, expected_response|
-    test "cancelled job stays cancelled after #{[http_method, action, params].inspect}" do
-      # We need to verify that "cancel" creates a trigger file, so first
-      # let's make sure there is no stale trigger file.
-      begin
-        File.unlink(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger)
-      rescue Errno::ENOENT
-      end
-
-      authorize_with :active
-      self.send http_method, action, params: { id: jobs(:cancelled).uuid }.merge(params)
-      assert_response expected_response
-      if expected_response == :success
-        job = json_response
-        assert_not_nil job['cancelled_at'], 'job cancelled again using #{attribute}=#{value} did not have cancelled_at value'
-        assert_equal job['state'], 'Cancelled', 'cancelled again job state changed when updated using using #{attribute}=#{value}'
-      end
-      # Verify database record still says Cancelled
-      assert_equal 'Cancelled', Job.find(jobs(:cancelled).id).state, 'job was un-cancelled'
-    end
-  end
-
-  test "cancelled job updated to any other state change results in error" do
-    # We need to verify that "cancel" creates a trigger file, so first
-    # let's make sure there is no stale trigger file.
-    begin
-      File.unlink(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger)
-    rescue Errno::ENOENT
-    end
-
-    authorize_with :active
-    put :update, params: {
-      id: jobs(:running_cancelled).uuid,
-      job: {
-        cancelled_at: nil
-      }
-    }
-    assert_response 422
-  end
-
-  ['abc.py', 'hash.py'].each do |script|
-    test "update job script attribute to #{script} without failing script_version check" do
-      authorize_with :admin
-      put :update, params: {
-        id: jobs(:uses_nonexistent_script_version).uuid,
-        job: {
-          script: script
-        }
-      }
-      assert_response :success
-      resp = assigns(:object)
-      assert_equal jobs(:uses_nonexistent_script_version).script_version, resp['script_version']
-    end
-  end
-
   test "search jobs by uuid with >= query" do
     authorize_with :active
     get :index, params: {
@@ -331,52 +165,12 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
     assert_response 422
   end
 
-  test "finish a job" do
-    authorize_with :active
-    put :update, params: {
-      id: jobs(:nearly_finished_job).uuid,
-      job: {
-        output: '551392cc37a317abf865b95f66f4ef94+101',
-        log: '9215de2a951a721f5f156bc08cf63ad7+93',
-        tasks_summary: {done: 1, running: 0, todo: 0, failed: 0},
-        success: true,
-        running: false,
-        finished_at: Time.now.to_s
-      }
-    }
-    assert_response :success
-  end
-
   [:spectator, :admin].each_with_index do |which_token, i|
     test "get job queue as #{which_token} user" do
       authorize_with which_token
       get :queue
       assert_response :success
-      assert_equal i, assigns(:objects).count
-    end
-  end
-
-  test "get job queue as with a = filter" do
-    authorize_with :admin
-    get :queue, params: { filters: [['script','=','foo']] }
-    assert_response :success
-    assert_equal ['foo'], assigns(:objects).collect(&:script).uniq
-    assert_equal 0, assigns(:objects)[0].queue_position
-  end
-
-  test "get job queue as with a != filter" do
-    authorize_with :admin
-    get :queue, params: { filters: [['script','!=','foo']] }
-    assert_response :success
-    assert_equal 0, assigns(:objects).count
-  end
-
-  [:spectator, :admin].each do |which_token|
-    test "get queue_size as #{which_token} user" do
-      authorize_with which_token
-      get :queue_size
-      assert_response :success
-      assert_equal 1, JSON.parse(@response.body)["queue_size"]
+      assert_equal 0, assigns(:objects).count
     end
   end
 
@@ -387,67 +181,6 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
     assert_equal([nodes(:busy).uuid], json_response["node_uuids"])
   end
 
-  test "job lock success" do
-    authorize_with :active
-    post :lock, params: {id: jobs(:queued).uuid}
-    assert_response :success
-    job = Job.where(uuid: jobs(:queued).uuid).first
-    assert_equal "Running", job.state
-  end
-
-  test "job lock conflict" do
-    authorize_with :active
-    post :lock, params: {id: jobs(:running).uuid}
-    assert_response 422 # invalid state transition
-  end
-
-  test 'reject invalid commit in remote repository' do
-    authorize_with :active
-    url = "http://localhost:1/fake/fake.git"
-    fetch_remote_from_local_repo url, :foo
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "abc123",
-        repository: url,
-        script_parameters: {}
-      }
-    }
-    assert_response 422
-  end
-
-  test 'tag remote commit in internal repository' do
-    authorize_with :active
-    url = "http://localhost:1/fake/fake.git"
-    fetch_remote_from_local_repo url, :foo
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: url,
-        script_parameters: {}
-      }
-    }
-    assert_response :success
-    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
-                 internal_tag(json_response['uuid']))
-  end
-
-  test 'tag local commit in internal repository' do
-    authorize_with :active
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {}
-      }
-    }
-    assert_response :success
-    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
-                 internal_tag(json_response['uuid']))
-  end
-
   test 'get job with components' do
     authorize_with :active
     get :show, params: {id: jobs(:running_job_with_components).uuid}
@@ -455,42 +188,4 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
     assert_not_nil json_response["components"]
     assert_equal ["component1", "component2"], json_response["components"].keys
   end
-
-  [
-    [:active, :success],
-    [:system_user, :success],
-    [:admin, 403],
-  ].each do |user, expected|
-    test "add components to job locked by active user as #{user} user and expect #{expected}" do
-      authorize_with user
-      put :update, params: {
-        id: jobs(:running).uuid,
-        job: {
-          components: {"component1" => "value1", "component2" => "value2"}
-        }
-      }
-      assert_response expected
-      if expected == :success
-        assert_not_nil json_response["components"]
-        keys = json_response["components"].keys
-        assert_equal ["component1", "component2"], keys
-        assert_equal "value1", json_response["components"][keys[0]]
-      end
-    end
-  end
-
-  test 'jobs.create disabled in config' do
-    Rails.configuration.API.DisabledAPIs = {"jobs.create"=>{},
-                                               "pipeline_instances.create"=>{}}
-    authorize_with :active
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {}
-      }
-    }
-    assert_response 404
-  end
 end
index a76151150fe873b950aaaf72bcd1ac403c1cbc54..e455354c11d7575eca838bc6de075d42f8052d04 100644 (file)
@@ -5,48 +5,4 @@
 require 'test_helper'
 
 class Arvados::V1::PipelineInstancesControllerTest < ActionController::TestCase
-
-  test 'create pipeline with components copied from template' do
-    authorize_with :active
-    post :create, params: {
-      pipeline_instance: {
-        pipeline_template_uuid: pipeline_templates(:two_part).uuid
-      }
-    }
-    assert_response :success
-    assert_equal(pipeline_templates(:two_part).components.to_json,
-                 assigns(:object).components.to_json)
-  end
-
-  test 'create pipeline with no template' do
-    authorize_with :active
-    post :create, params: {
-      pipeline_instance: {
-        components: {}
-      }
-    }
-    assert_response :success
-    assert_equal({}, assigns(:object).components)
-  end
-
-  [
-    true,
-    false
-  ].each do |cascade|
-    test "cancel a pipeline instance with cascade=#{cascade}" do
-      authorize_with :active
-      pi_uuid = pipeline_instances(:job_child_pipeline_with_components_at_level_2).uuid
-
-      post :cancel, params: {id: pi_uuid, cascade: cascade}
-      assert_response :success
-
-      pi = PipelineInstance.where(uuid: pi_uuid).first
-      assert_equal "Paused", pi.state
-
-      children = Job.where(uuid: ['zzzzz-8i9sb-job1atlevel3noc', 'zzzzz-8i9sb-job2atlevel3noc'])
-      children.each do |child|
-        assert_equal ("Cancelled" == child.state), cascade
-      end
-    end
-  end
 end
index 03189bdfeae50c808a48069338824772513ae541..59debc57605c397435c48c49b70255a5359f873f 100644 (file)
@@ -30,7 +30,7 @@ module GitTestHelper
     end
 
     base.teardown do
-      FileUtils.remove_entry Commit.cache_dir_base, true
+      FileUtils.remove_entry CommitsHelper.cache_dir_base, true
       FileUtils.mkdir_p @tmpdir
       system("tar", "-xC", @tmpdir.to_s, "-f", "test/test.git.tar")
     end
@@ -48,10 +48,10 @@ module GitTestHelper
     if fakeurl.is_a? Symbol
       fakeurl = 'file://' + repositories(fakeurl).server_path
     end
-    Commit.expects(:fetch_remote_repository).once.with do |gitdir, giturl|
+    CommitsHelper.expects(:fetch_remote_repository).once.with do |gitdir, giturl|
       if giturl == url
-        Commit.unstub(:fetch_remote_repository)
-        Commit.fetch_remote_repository gitdir, fakeurl
+        CommitsHelper.unstub(:fetch_remote_repository)
+        CommitsHelper.fetch_remote_repository gitdir, fakeurl
         true
       end
     end
diff --git a/services/api/test/integration/crunch_dispatch_test.rb b/services/api/test/integration/crunch_dispatch_test.rb
deleted file mode 100644 (file)
index 6ac1270..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'test_helper'
-require 'helpers/git_test_helper'
-
-class CrunchDispatchIntegrationTest < ActionDispatch::IntegrationTest
-  include GitTestHelper
-
-  fixtures :all
-
-  @@crunch_dispatch_pid = nil
-
-  def launch_crunch_dispatch
-    @@crunch_dispatch_pid = Process.fork {
-      ENV['PATH'] = ENV['HOME'] + '/arvados/services/crunch:' + ENV['PATH']
-      exec(ENV['HOME'] + '/arvados/services/api/script/crunch-dispatch.rb')
-    }
-  end
-
-  teardown do
-    if @@crunch_dispatch_pid
-      Process.kill "TERM", @@crunch_dispatch_pid
-      Process.wait
-      @@crunch_dispatch_pid = nil
-    end
-  end
-
-  test "job runs" do
-    post "/arvados/v1/jobs",
-      params: {
-        format: "json",
-        job: {
-          script: "log",
-          repository: "active/crunchdispatchtest",
-          script_version: "f35f99b7d32bac257f5989df02b9f12ee1a9b0d6",
-          script_parameters: {
-            input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-            an_integer: '1'
-          }
-        }
-      },
-      headers: auth(:admin)
-    assert_response :success
-  end
-end
index f5fb920b46d82e4e48bd06c5db1b145ebd5279b3..76d4fff59e95f3e4deeabdf9b326b677af613382 100644 (file)
@@ -5,87 +5,4 @@
 require 'test_helper'
 
 class JobsApiTest < ActionDispatch::IntegrationTest
-  fixtures :all
-
-  test "cancel job" do
-    post "/arvados/v1/jobs/#{jobs(:running).uuid}/cancel",
-      params: {:format => :json},
-      headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{api_client_authorizations(:active).api_token}"}
-    assert_response :success
-    assert_equal "arvados#job", json_response['kind']
-    assert_not_nil json_response['cancelled_at']
-  end
-
-  test "cancel someone else's visible job" do
-    post "/arvados/v1/jobs/#{jobs(:runningbarbaz).uuid}/cancel",
-      params: {:format => :json},
-      headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{api_client_authorizations(:spectator).api_token}"}
-    assert_response 403
-  end
-
-  test "cancel someone else's invisible job" do
-    post "/arvados/v1/jobs/#{jobs(:running).uuid}/cancel",
-      params: {:format => :json},
-      headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{api_client_authorizations(:spectator).api_token}"}
-    assert_response 404
-  end
-
-  test "task qsequence values automatically increase monotonically" do
-    post_args = ["/arvados/v1/job_tasks",
-                 params: {job_task: {
-                     job_uuid: jobs(:running).uuid,
-                     sequence: 1,
-                   }},
-                 headers: auth(:active)]
-    last_qsequence = -1
-    (1..3).each do |task_num|
-      @response = nil
-      post(*post_args)
-      assert_response :success
-      qsequence = json_response["qsequence"]
-      assert_not_nil(qsequence, "task not assigned qsequence")
-      assert_operator(qsequence, :>, last_qsequence,
-                      "qsequence did not increase between tasks")
-      last_qsequence = qsequence
-    end
-  end
-
-  test 'get_delete components_get again for job with components' do
-    authorize_with :active
-    get "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}",
-      headers: auth(:active)
-    assert_response 200
-    assert_not_nil json_response["components"]
-    assert_equal ["component1", "component2"], json_response["components"].keys
-
-    # delete second component
-    put "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}", params: {
-      job: {
-        components: {"component1" => "zzzzz-8i9sb-jobuuid00000001"}
-      },
-      limit: 1000
-    }, headers: auth(:active)
-    assert_response 200
-
-    get "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}",
-      headers: auth(:active)
-    assert_response 200
-    assert_not_nil json_response["components"]
-    assert_equal ["component1"], json_response["components"].keys
-
-    # delete all components
-    put "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}", params: {
-      job: {
-        components: nil
-      },
-      limit: 1000
-    }, headers: auth(:active)
-    assert_response 200
-
-    get "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}",
-      headers: auth(:active)
-    assert_response 200
-    assert_not_nil json_response["components"]
-    assert_equal [], json_response["components"].keys
-  end
 end
index d4f7eba30265ca52d8006983665fdc17e37623b0..4d8f88248a85f25233e7a5a2813b12468e745f2d 100644 (file)
@@ -5,40 +5,4 @@
 require 'test_helper'
 
 class PipelineIntegrationTest < ActionDispatch::IntegrationTest
-  # These tests simulate the workflow of arv-run-pipeline-instance
-  # and other pipeline-running code.
-
-  def check_component_match(comp_key, comp_hash)
-    assert_response :success
-    built_json = json_response
-    built_component = built_json["components"][comp_key]
-    comp_hash.each_pair do |key, expected|
-      assert_equal(expected, built_component[key.to_s],
-                   "component's #{key} field changed")
-    end
-  end
-
-  test "creating a pipeline instance preserves required component parameters" do
-    comp_name = "test_component"
-    component = {
-      repository: "test_repo",
-      script: "test_script",
-      script_version: "test_refspec",
-      script_parameters: {},
-    }
-
-    post("/arvados/v1/pipeline_instances",
-      params: {
-        pipeline_instance: {
-          components: {comp_name => component}
-        }.to_json
-      },
-      headers: auth(:active))
-    check_component_match(comp_name, component)
-    pi_uuid = json_response["uuid"]
-
-    @response = nil
-    get("/arvados/v1/pipeline_instances/#{pi_uuid}", params: {}, headers: auth(:active))
-    check_component_match(comp_name, component)
-  end
 end
index 16d43e6f3c61b9fc99edbe4c580e9f90bb227323..f41c033b3918c2fea8b9dc0e088f534600d38ab7 100644 (file)
@@ -15,31 +15,10 @@ class SerializedEncodingTest < ActionDispatch::IntegrationTest
 
     human: {properties: {eye_color: 'gray'}},
 
-    job: {
-      repository: 'active/foo',
-      runtime_constraints: {docker_image: 'arvados/apitestfixture'},
-      script: 'hash',
-      script_version: 'master',
-      script_parameters: {pattern: 'foobar'},
-      tasks_summary: {todo: 0},
-    },
-
-    job_task: {parameters: {pattern: 'foo'}},
-
     link: {link_class: 'test', name: 'test', properties: {foo: :bar}},
 
     node: {info: {uptime: 1234}},
 
-    pipeline_instance: {
-      components: {"job1" => {parameters: {pattern: "xyzzy"}}},
-      components_summary: {todo: 0},
-      properties: {test: true},
-    },
-
-    pipeline_template: {
-      components: {"job1" => {parameters: {pattern: "xyzzy"}}},
-    },
-
     specimen: {properties: {eye_color: 'meringue'}},
 
     trait: {properties: {eye_color: 'brown'}},
index c5d72c3bfea7ef21cc93a5a8d88db4f564008601..1c772de0470771f8768b7b4c24f2df9c598361a4 100644 (file)
@@ -22,7 +22,7 @@ class CommitTest < ActiveSupport::TestCase
   test 'find_commit_range does not bypass permissions' do
     authorize_with :inactive
     assert_raises ArgumentError do
-      Commit.find_commit_range 'foo', nil, 'master', []
+      CommitsHelper::find_commit_range 'foo', nil, 'master', []
     end
   end
 
@@ -41,9 +41,9 @@ class CommitTest < ActiveSupport::TestCase
   ].each do |url|
     test "find_commit_range uses fetch_remote_repository to get #{url}" do
       fake_gitdir = repositories(:foo).server_path
-      Commit.expects(:cache_dir_for).once.with(url).returns fake_gitdir
-      Commit.expects(:fetch_remote_repository).once.with(fake_gitdir, url).returns true
-      c = Commit.find_commit_range url, nil, 'master', []
+      CommitsHelper::expects(:cache_dir_for).once.with(url).returns fake_gitdir
+      CommitsHelper::expects(:fetch_remote_repository).once.with(fake_gitdir, url).returns true
+      c = CommitsHelper::find_commit_range url, nil, 'master', []
       refute_empty c
     end
   end
@@ -57,9 +57,9 @@ class CommitTest < ActiveSupport::TestCase
    'github.com/curoverse/arvados.git',
   ].each do |url|
     test "find_commit_range skips fetch_remote_repository for #{url}" do
-      Commit.expects(:fetch_remote_repository).never
+      CommitsHelper::expects(:fetch_remote_repository).never
       assert_raises ArgumentError do
-        Commit.find_commit_range url, nil, 'master', []
+        CommitsHelper::find_commit_range url, nil, 'master', []
       end
     end
   end
@@ -67,12 +67,12 @@ class CommitTest < ActiveSupport::TestCase
   test 'fetch_remote_repository does not leak commits across repositories' do
     url = "http://localhost:1/fake/fake.git"
     fetch_remote_from_local_repo url, :foo
-    c = Commit.find_commit_range url, nil, 'master', []
+    c = CommitsHelper::find_commit_range url, nil, 'master', []
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57'], c
 
     url = "http://localhost:2/fake/fake.git"
     fetch_remote_from_local_repo url, 'file://' + File.expand_path('../../.git', Rails.root)
-    c = Commit.find_commit_range url, nil, '077ba2ad3ea24a929091a9e6ce545c93199b8e57', []
+    c = CommitsHelper::find_commit_range url, nil, '077ba2ad3ea24a929091a9e6ce545c93199b8e57', []
     assert_equal [], c
   end
 
@@ -82,7 +82,7 @@ class CommitTest < ActiveSupport::TestCase
     IO.read("|#{gitint} tag -d testtag 2>/dev/null") # "no such tag", fine
     assert_match(/^fatal: /, IO.read("|#{gitint} show testtag 2>&1"))
     refute $?.success?
-    Commit.tag_in_internal_repository 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', 'testtag'
+    CommitsHelper::tag_in_internal_repository 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', 'testtag'
     assert_match(/^commit 31ce37f/, IO.read("|#{gitint} show testtag"))
     assert $?.success?
   end
@@ -106,7 +106,7 @@ class CommitTest < ActiveSupport::TestCase
       must_pipe("git rm bar")
       must_pipe("git -c user.email=x@x -c user.name=X commit -m -")
     end
-    Commit.tag_in_internal_repository 'active/foo', sha1, tag
+    CommitsHelper::tag_in_internal_repository 'active/foo', sha1, tag
     gitint = "git --git-dir #{Rails.configuration.Containers.JobsAPI.GitInternalDir.shellescape}"
     assert_match(/^commit /, IO.read("|#{gitint} show #{tag.shellescape}"))
     assert $?.success?
@@ -122,7 +122,7 @@ class CommitTest < ActiveSupport::TestCase
       sha1 = must_pipe("git log -n1 --format=%H").strip
       must_pipe("git reset --hard HEAD^")
     end
-    Commit.tag_in_internal_repository 'active/foo', sha1, tag
+    CommitsHelper::tag_in_internal_repository 'active/foo', sha1, tag
     gitint = "git --git-dir #{Rails.configuration.Containers.JobsAPI.GitInternalDir.shellescape}"
     assert_match(/^commit /, IO.read("|#{gitint} show #{tag.shellescape}"))
     assert $?.success?
@@ -141,50 +141,50 @@ class CommitTest < ActiveSupport::TestCase
 
   test "find_commit_range min_version prefers commits over branch names" do
     assert_equal([COMMIT_BRANCH_NAME],
-                 Commit.find_commit_range("active/shabranchnames",
+                 CommitsHelper::find_commit_range("active/shabranchnames",
                                           COMMIT_BRANCH_NAME, nil, nil))
   end
 
   test "find_commit_range max_version prefers commits over branch names" do
     assert_equal([COMMIT_BRANCH_NAME],
-                 Commit.find_commit_range("active/shabranchnames",
+                 CommitsHelper::find_commit_range("active/shabranchnames",
                                           nil, COMMIT_BRANCH_NAME, nil))
   end
 
   test "find_commit_range min_version with short branch name" do
     assert_equal([SHORT_BRANCH_COMMIT_2],
-                 Commit.find_commit_range("active/shabranchnames",
+                 CommitsHelper::find_commit_range("active/shabranchnames",
                                           SHORT_COMMIT_BRANCH_NAME, nil, nil))
   end
 
   test "find_commit_range max_version with short branch name" do
     assert_equal([SHORT_BRANCH_COMMIT_2],
-                 Commit.find_commit_range("active/shabranchnames",
+                 CommitsHelper::find_commit_range("active/shabranchnames",
                                           nil, SHORT_COMMIT_BRANCH_NAME, nil))
   end
 
   test "find_commit_range min_version with disambiguated branch name" do
     assert_equal([COMMIT_BRANCH_COMMIT_2],
-                 Commit.find_commit_range("active/shabranchnames",
+                 CommitsHelper::find_commit_range("active/shabranchnames",
                                           "heads/#{COMMIT_BRANCH_NAME}",
                                           nil, nil))
   end
 
   test "find_commit_range max_version with disambiguated branch name" do
     assert_equal([COMMIT_BRANCH_COMMIT_2],
-                 Commit.find_commit_range("active/shabranchnames", nil,
+                 CommitsHelper::find_commit_range("active/shabranchnames", nil,
                                           "heads/#{COMMIT_BRANCH_NAME}", nil))
   end
 
   test "find_commit_range min_version with unambiguous short name" do
     assert_equal([COMMIT_BRANCH_NAME],
-                 Commit.find_commit_range("active/shabranchnames",
+                 CommitsHelper::find_commit_range("active/shabranchnames",
                                           COMMIT_BRANCH_NAME[0..-2], nil, nil))
   end
 
   test "find_commit_range max_version with unambiguous short name" do
     assert_equal([COMMIT_BRANCH_NAME],
-                 Commit.find_commit_range("active/shabranchnames", nil,
+                 CommitsHelper::find_commit_range("active/shabranchnames", nil,
                                           COMMIT_BRANCH_NAME[0..-2], nil))
   end
 
@@ -192,77 +192,77 @@ class CommitTest < ActiveSupport::TestCase
     authorize_with :active
 
     # single
-    a = Commit.find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    a = CommitsHelper::find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
     assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
 
     #test "test_branch1" do
-    a = Commit.find_commit_range('active/foo', nil, 'master', nil)
+    a = CommitsHelper::find_commit_range('active/foo', nil, 'master', nil)
     assert_includes(a, '077ba2ad3ea24a929091a9e6ce545c93199b8e57')
 
     #test "test_branch2" do
-    a = Commit.find_commit_range('active/foo', nil, 'b1', nil)
+    a = CommitsHelper::find_commit_range('active/foo', nil, 'b1', nil)
     assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
 
     #test "test_branch3" do
-    a = Commit.find_commit_range('active/foo', nil, 'HEAD', nil)
+    a = CommitsHelper::find_commit_range('active/foo', nil, 'HEAD', nil)
     assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
 
     #test "test_single_revision_repo" do
-    a = Commit.find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    a = CommitsHelper::find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
     assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-    a = Commit.find_commit_range('arvados', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    a = CommitsHelper::find_commit_range('arvados', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
     assert_equal [], a
 
     #test "test_multi_revision" do
     # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
+    a = CommitsHelper::find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
 
     #test "test_tag" do
     # complains "fatal: ambiguous argument 'tag1': unknown revision or path
     # not in the working tree."
-    a = Commit.find_commit_range('active/foo', 'tag1', 'master', nil)
+    a = CommitsHelper::find_commit_range('active/foo', 'tag1', 'master', nil)
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577'], a
 
     #test "test_multi_revision_exclude" do
-    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
+    a = CommitsHelper::find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
 
     #test "test_multi_revision_tagged_exclude" do
     # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
+    a = CommitsHelper::find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
     assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
 
     Dir.mktmpdir do |touchdir|
       # invalid input to maximum
-      a = Commit.find_commit_range('active/foo', nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
+      a = CommitsHelper::find_commit_range('active/foo', nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
       assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to maximum
-      a = Commit.find_commit_range('active/foo', nil, "$(uname>#{touchdir}/uh_oh)", nil)
+      a = CommitsHelper::find_commit_range('active/foo', nil, "$(uname>#{touchdir}/uh_oh)", nil)
       assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to minimum
-      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      a = CommitsHelper::find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
       assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to minimum
-      a = Commit.find_commit_range('active/foo', "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      a = CommitsHelper::find_commit_range('active/foo', "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
       assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to 'excludes'
       # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
+      a = CommitsHelper::find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
       assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to 'excludes'
       # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
+      a = CommitsHelper::find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
       assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
       assert_equal [], a
     end
diff --git a/services/api/test/unit/crunch_dispatch_test.rb b/services/api/test/unit/crunch_dispatch_test.rb
deleted file mode 100644 (file)
index 3a8f90a..0000000
+++ /dev/null
@@ -1,218 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'test_helper'
-require 'crunch_dispatch'
-require 'helpers/git_test_helper'
-
-class CrunchDispatchTest < ActiveSupport::TestCase
-  include GitTestHelper
-
-  test 'choose cheaper nodes first' do
-    act_as_system_user do
-      # Replace test fixtures with a set suitable for testing dispatch
-      Node.destroy_all
-
-      # Idle nodes with different prices
-      [['compute1', 3.20, 32],
-       ['compute2', 1.60, 16],
-       ['compute3', 0.80, 8]].each do |hostname, price, cores|
-        Node.create!(hostname: hostname,
-                     info: {
-                       'slurm_state' => 'idle',
-                     },
-                     properties: {
-                       'cloud_node' => {
-                         'price' => price,
-                       },
-                       'total_cpu_cores' => cores,
-                       'total_ram_mb' => cores*1024,
-                       'total_scratch_mb' => cores*10000,
-                     })
-      end
-
-      # Node with no price information
-      Node.create!(hostname: 'compute4',
-                   info: {
-                     'slurm_state' => 'idle',
-                   },
-                   properties: {
-                     'total_cpu_cores' => 8,
-                     'total_ram_mb' => 8192,
-                     'total_scratch_mb' => 80000,
-                   })
-
-      # Cheap but busy node
-      Node.create!(hostname: 'compute5',
-                   info: {
-                     'slurm_state' => 'alloc',
-                   },
-                   properties: {
-                     'cloud_node' => {
-                       'price' => 0.10,
-                     },
-                     'total_cpu_cores' => 32,
-                     'total_ram_mb' => 32768,
-                     'total_scratch_mb' => 320000,
-                   })
-    end
-
-    dispatch = CrunchDispatch.new
-    [[1, 16384, ['compute2']],
-     [2, 16384, ['compute2', 'compute1']],
-     [2, 8000, ['compute4', 'compute3']],
-    ].each do |min_nodes, min_ram, expect_nodes|
-      job = Job.new(uuid: 'zzzzz-8i9sb-382lhiizavzhqlp',
-                    runtime_constraints: {
-                      'min_nodes' => min_nodes,
-                      'min_ram_mb_per_node' => min_ram,
-                    })
-      nodes = dispatch.nodes_available_for_job_now job
-      assert_equal expect_nodes, nodes
-    end
-  end
-
-  test 'respond to TERM' do
-    lockfile = Rails.root.join 'tmp', 'dispatch.lock'
-    ENV['CRUNCH_DISPATCH_LOCKFILE'] = lockfile.to_s
-    begin
-      pid = Process.fork do
-        begin
-          dispatch = CrunchDispatch.new
-          dispatch.stubs(:did_recently).returns true
-          dispatch.run []
-        ensure
-          Process.exit!
-        end
-      end
-      assert_with_timeout 5, "Dispatch did not lock #{lockfile}" do
-        !can_lock(lockfile)
-      end
-    ensure
-      Process.kill("TERM", pid)
-    end
-    assert_with_timeout 20, "Dispatch did not unlock #{lockfile}" do
-      can_lock(lockfile)
-    end
-  end
-
-  test 'override --cgroup-root with CRUNCH_CGROUP_ROOT' do
-    ENV['CRUNCH_CGROUP_ROOT'] = '/path/to/cgroup'
-    Rails.configuration.Containers.JobsAPI.CrunchJobWrapper = "none"
-    act_as_system_user do
-      j = Job.create(repository: 'active/foo',
-                     script: 'hash',
-                     script_version: '4fe459abe02d9b365932b8f5dc419439ab4e2577',
-                     script_parameters: {})
-      ok = false
-      Open3.expects(:popen3).at_least_once.with do |*args|
-        if args.index(j.uuid)
-          ok = ((i = args.index '--cgroup-root') and
-                (args[i+1] == '/path/to/cgroup'))
-        end
-        true
-      end.raises(StandardError.new('all is well'))
-      dispatch = CrunchDispatch.new
-      dispatch.parse_argv ['--jobs']
-      dispatch.refresh_todo
-      dispatch.start_jobs
-      assert ok
-    end
-  end
-
-  def assert_with_timeout timeout, message
-    t = 0
-    while (t += 0.1) < timeout
-      if yield
-        return
-      end
-      sleep 0.1
-    end
-    assert false, message + " (waited #{timeout} seconds)"
-  end
-
-  def can_lock lockfile
-    lockfile.open(File::RDWR|File::CREAT, 0644) do |f|
-      return f.flock(File::LOCK_EX|File::LOCK_NB)
-    end
-  end
-
-  test 'rate limit of partial line segments' do
-    act_as_system_user do
-      Rails.configuration.Containers.Logging.LogPartialLineThrottlePeriod = 1
-
-      job = {}
-      job[:bytes_logged] = 0
-      job[:log_throttle_bytes_so_far] = 0
-      job[:log_throttle_lines_so_far] = 0
-      job[:log_throttle_bytes_skipped] = 0
-      job[:log_throttle_is_open] = true
-      job[:log_throttle_partial_line_last_at] = Time.new(0)
-      job[:log_throttle_first_partial_line] = true
-
-      dispatch = CrunchDispatch.new
-
-      line = "first log line"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal true, limit
-      assert_equal "first log line", line
-      assert_equal 1, job[:log_throttle_lines_so_far]
-
-      # first partial line segment is skipped and counted towards skipped lines
-      now = Time.now.strftime('%Y-%m-%d-%H:%M:%S')
-      line = "#{now} localhost 100 0 stderr [...] this is first partial line segment [...]"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal true, limit
-      assert_includes line, "Rate-limiting partial segments of long lines", line
-      assert_equal 2, job[:log_throttle_lines_so_far]
-
-      # next partial line segment within throttle interval is skipped but not counted towards skipped lines
-      line = "#{now} localhost 100 0 stderr [...] second partial line segment within the interval [...]"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal false, limit
-      assert_equal 2, job[:log_throttle_lines_so_far]
-
-      # next partial line after interval is counted towards skipped lines
-      sleep(1)
-      line = "#{now} localhost 100 0 stderr [...] third partial line segment after the interval [...]"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal false, limit
-      assert_equal 3, job[:log_throttle_lines_so_far]
-
-      # this is not a valid line segment
-      line = "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal true, limit
-      assert_equal "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment", line
-      assert_equal 4, job[:log_throttle_lines_so_far]
-
-      # this also is not a valid line segment
-      line = "#{now} localhost 100 0 stderr does not start correctly but ends with [...]"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal true, limit
-      assert_equal "#{now} localhost 100 0 stderr does not start correctly but ends with [...]", line
-      assert_equal 5, job[:log_throttle_lines_so_far]
-    end
-  end
-
-  test 'scancel orphaned job nodes' do
-    Rails.configuration.Containers.JobsAPI.CrunchJobWrapper = "slurm_immediate"
-    act_as_system_user do
-      dispatch = CrunchDispatch.new
-
-      squeue_resp = IO.popen("echo zzzzz-8i9sb-pshmckwoma9plh7\necho thisisnotvalidjobuuid\necho zzzzz-8i9sb-4cf0abc123e809j\necho zzzzz-dz642-o04e3r651turtdr\n")
-      scancel_resp = IO.popen("true")
-
-      IO.expects(:popen).
-        with(['squeue', '-a', '-h', '-o', '%j']).
-        returns(squeue_resp)
-
-      IO.expects(:popen).
-        with(dispatch.sudo_preface + ['scancel', '-n', 'zzzzz-8i9sb-4cf0abc123e809j']).
-        returns(scancel_resp)
-
-      dispatch.check_orphaned_slurm_jobs
-    end
-  end
-end
diff --git a/services/api/test/unit/fail_jobs_test.rb b/services/api/test/unit/fail_jobs_test.rb
deleted file mode 100644 (file)
index 304335c..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'test_helper'
-require 'crunch_dispatch'
-
-class FailJobsTest < ActiveSupport::TestCase
-  include DbCurrentTime
-
-  BOOT_TIME = 1448378837
-
-  setup do
-    @job = {}
-    act_as_user users(:admin) do
-      @job[:before_reboot] = Job.create!(state: 'Running',
-                                         running: true,
-                                         started_at: Time.at(BOOT_TIME - 300))
-      @job[:after_reboot] = Job.create!(state: 'Running',
-                                        running: true,
-                                        started_at: Time.at(BOOT_TIME + 300))
-      @job[:complete] = Job.create!(state: 'Running',
-                                    running: true,
-                                    started_at: Time.at(BOOT_TIME - 300))
-      @job[:complete].update_attributes(state: 'Complete')
-      @job[:complete].update_attributes(finished_at: Time.at(BOOT_TIME + 100))
-      @job[:queued] = jobs(:queued)
-
-      @job.values.each do |job|
-        # backdate timestamps
-        Job.where(uuid: job.uuid).
-          update_all(created_at: Time.at(BOOT_TIME - 330),
-                     modified_at: (job.finished_at ||
-                                   job.started_at ||
-                                   Time.at(BOOT_TIME - 300)))
-      end
-    end
-    @dispatch = CrunchDispatch.new
-    @test_start_time = db_current_time
-  end
-
-  test 'cancel slurm jobs' do
-    Rails.configuration.Containers.JobsAPI.CrunchJobWrapper = "slurm_immediate"
-    Rails.configuration.Containers.JobsAPI.CrunchJobUser = 'foobar'
-    fake_squeue = IO.popen("echo #{@job[:before_reboot].uuid}")
-    fake_scancel = IO.popen("true")
-    IO.expects(:popen).
-      with(['squeue', '-a', '-h', '-o', '%j']).
-      returns(fake_squeue)
-    IO.expects(:popen).
-      with(includes('sudo', '-u', 'foobar', 'scancel', '-n', @job[:before_reboot].uuid)).
-      returns(fake_scancel)
-    @dispatch.fail_jobs(before: Time.at(BOOT_TIME).to_s)
-    assert_end_states
-  end
-
-  test 'use reboot time' do
-    Rails.configuration.Containers.JobsAPI.CrunchJobWrapper = nil
-    @dispatch.expects(:open).once.with('/proc/stat').
-      returns open(Rails.root.join('test/fixtures/files/proc_stat'))
-    @dispatch.fail_jobs(before: 'reboot')
-    assert_end_states
-  end
-
-  test 'command line help' do
-    cmd = Rails.root.join('script/fail-jobs.rb').to_s
-    assert_match(/Options:.*--before=/m, File.popen([cmd, '--help']).read)
-  end
-
-  protected
-
-  def assert_end_states
-    @job.values.map(&:reload)
-    assert_equal 'Failed', @job[:before_reboot].state
-    assert_equal false, @job[:before_reboot].running
-    assert_equal false, @job[:before_reboot].success
-    assert_operator @job[:before_reboot].finished_at, :>=, @test_start_time
-    assert_operator @job[:before_reboot].finished_at, :<=, db_current_time
-    assert_equal 'Running', @job[:after_reboot].state
-    assert_equal 'Complete', @job[:complete].state
-    assert_equal 'Queued', @job[:queued].state
-  end
-end
index 05a5d21a24b869f39034a874361df0c76e9e9149..36a0e723f26dd318157af57ce4c250d0fb605887 100644 (file)
@@ -5,16 +5,5 @@
 require 'test_helper'
 
 class JobTaskTest < ActiveSupport::TestCase
-  test "new tasks get an assigned qsequence" do
-    set_user_from_auth :active
-    task = JobTask.create
-    assert_not_nil task.qsequence
-    assert_operator(task.qsequence, :>=, 0)
-  end
 
-  test "assigned qsequence is not overwritten" do
-    set_user_from_auth :active
-    task = JobTask.create!(qsequence: 99)
-    assert_equal(99, task.qsequence)
-  end
 end
index 764aac3e4734dc94d6cdbe66b03f39305df51d1c..0e8cc48538e0f00acc0650d80910774876bc7822 100644 (file)
@@ -104,16 +104,6 @@ class JobTest < ActiveSupport::TestCase
     end
   end
 
-  test "create a job with a disambiguated script_version branch name" do
-    job = Job.
-      new(script: "testscript",
-          script_version: "heads/7387838c69a21827834586cc42b467ff6c63293b",
-          repository: "active/shabranchnames",
-          script_parameters: {})
-    assert(job.save)
-    assert_equal("abec49829bf1758413509b7ffcab32a771b71e81", job.script_version)
-  end
-
   test "locate a Docker image with a partial hash" do
     image_hash = links(:docker_image_collection_hash).name[0..24]
     job = Job.new job_attrs(runtime_constraints:
@@ -178,178 +168,6 @@ class JobTest < ActiveSupport::TestCase
     check_modification_prohibited(docker_image_locator: BAD_COLLECTION)
   end
 
-  [
-   {script_parameters: ""},
-   {script_parameters: []},
-   {script_parameters: {["foo"] => ["bar"]}},
-   {runtime_constraints: ""},
-   {runtime_constraints: []},
-   {tasks_summary: ""},
-   {tasks_summary: []},
-  ].each do |invalid_attrs|
-    test "validation failures set error messages: #{invalid_attrs.to_json}" do
-      # Ensure valid_attrs doesn't produce errors -- otherwise we will
-      # not know whether errors reported below are actually caused by
-      # invalid_attrs.
-      Job.new(job_attrs).save!
-
-      err = assert_raises(ArgumentError) do
-        Job.new(job_attrs(invalid_attrs)).save!
-      end
-      assert_match /parameters|constraints|summary/, err.message
-    end
-  end
-
-  test "invalid script_version" do
-    invalid = {
-      script_version: "no/branch/could/ever/possibly/have/this/name",
-    }
-    err = assert_raises(ActiveRecord::RecordInvalid) do
-      Job.new(job_attrs(invalid)).save!
-    end
-    assert_match /Script version .* does not resolve to a commit/, err.message
-  end
-
-  [
-    # Each test case is of the following format
-    # Array of parameters where each parameter is of the format:
-    #  attr name to be changed, attr value, and array of expectations (where each expectation is an array)
-    [['running', false, [['state', 'Queued']]]],
-    [['state', 'Running', [['started_at', 'not_nil']]]],
-    [['is_locked_by_uuid', 'use_current_user_uuid', [['state', 'Queued']]], ['state', 'Running', [['running', true], ['started_at', 'not_nil'], ['success', 'nil']]]],
-    [['running', false, [['state', 'Queued']]], ['state', 'Complete', [['success', true]]]],
-    [['running', true, [['state', 'Running']]], ['cancelled_at', Time.now, [['state', 'Cancelled']]]],
-    [['running', true, [['state', 'Running']]], ['state', 'Cancelled', [['cancelled_at', 'not_nil']]]],
-    [['running', true, [['state', 'Running']]], ['success', true, [['state', 'Complete']]]],
-    [['running', true, [['state', 'Running']]], ['success', false, [['state', 'Failed']]]],
-    [['running', true, [['state', 'Running']]], ['state', 'Complete', [['success', true],['finished_at', 'not_nil']]]],
-    [['running', true, [['state', 'Running']]], ['state', 'Failed', [['success', false],['finished_at', 'not_nil']]]],
-    [['cancelled_at', Time.now, [['state', 'Cancelled']]], ['success', false, [['state', 'Cancelled'],['finished_at', 'nil'], ['cancelled_at', 'not_nil']]]],
-    [['cancelled_at', Time.now, [['state', 'Cancelled'],['running', false]]], ['success', true, [['state', 'Cancelled'],['running', false],['finished_at', 'nil'],['cancelled_at', 'not_nil']]]],
-    # potential migration cases
-    [['state', nil, [['state', 'Queued']]]],
-    [['state', nil, [['state', 'Queued']]], ['cancelled_at', Time.now, [['state', 'Cancelled']]]],
-    [['running', true, [['state', 'Running']]], ['state', nil, [['state', 'Running']]]],
-  ].each do |parameters|
-    test "verify job status #{parameters}" do
-      job = Job.create! job_attrs
-      assert_equal 'Queued', job.state, "job.state"
-
-      parameters.each do |parameter|
-        expectations = parameter[2]
-        if 'use_current_user_uuid' == parameter[1]
-          parameter[1] = Thread.current[:user].uuid
-        end
-
-        if expectations.instance_of? Array
-          job[parameter[0]] = parameter[1]
-          assert_equal true, job.save, job.errors.full_messages.to_s
-          expectations.each do |expectation|
-            if expectation[1] == 'not_nil'
-              assert_not_nil job[expectation[0]], expectation[0]
-            elsif expectation[1] == 'nil'
-              assert_nil job[expectation[0]], expectation[0]
-            else
-              assert_equal expectation[1], job[expectation[0]], expectation[0]
-            end
-          end
-        else
-          raise 'I do not know how to handle this expectation'
-        end
-      end
-    end
-  end
-
-  test "Test job state changes" do
-    all = ["Queued", "Running", "Complete", "Failed", "Cancelled"]
-    valid = {"Queued" => all, "Running" => ["Complete", "Failed", "Cancelled"]}
-    all.each do |start|
-      all.each do |finish|
-        if start != finish
-          job = Job.create! job_attrs(state: start)
-          assert_equal start, job.state
-          job.state = finish
-          job.save
-          job.reload
-          if valid[start] and valid[start].include? finish
-            assert_equal finish, job.state
-          else
-            assert_equal start, job.state
-          end
-        end
-      end
-    end
-  end
-
-  test "Test job locking" do
-    set_user_from_auth :active_trustedclient
-    job = Job.create! job_attrs
-
-    assert_equal "Queued", job.state
-
-    # Should be able to lock successfully
-    job.lock current_user.uuid
-    assert_equal "Running", job.state
-
-    assert_raises ArvadosModel::AlreadyLockedError do
-      # Can't lock it again
-      job.lock current_user.uuid
-    end
-    job.reload
-    assert_equal "Running", job.state
-
-    set_user_from_auth :project_viewer
-    assert_raises ArvadosModel::AlreadyLockedError do
-      # Can't lock it as a different user either
-      job.lock current_user.uuid
-    end
-    job.reload
-    assert_equal "Running", job.state
-
-    assert_raises ArvadosModel::PermissionDeniedError do
-      # Can't update fields as a different user
-      job.update_attributes(state: "Failed")
-    end
-    job.reload
-    assert_equal "Running", job.state
-
-
-    set_user_from_auth :active_trustedclient
-
-    # Can update fields as the locked_by user
-    job.update_attributes(state: "Failed")
-    assert_equal "Failed", job.state
-  end
-
-  test "admin user can cancel a running job despite lock" do
-    set_user_from_auth :active_trustedclient
-    job = Job.create! job_attrs
-    job.lock current_user.uuid
-    assert_equal Job::Running, job.state
-
-    set_user_from_auth :spectator
-    assert_raises do
-      job.update_attributes!(state: Job::Cancelled)
-    end
-
-    set_user_from_auth :admin
-    job.reload
-    assert_equal Job::Running, job.state
-    job.update_attributes!(state: Job::Cancelled)
-    assert_equal Job::Cancelled, job.state
-  end
-
-  test "verify job queue position" do
-    job1 = Job.create! job_attrs
-    assert_equal 'Queued', job1.state, "Incorrect job state for newly created job1"
-
-    job2 = Job.create! job_attrs
-    assert_equal 'Queued', job2.state, "Incorrect job state for newly created job2"
-
-    assert_not_nil job1.queue_position, "Expected non-nil queue position for job1"
-    assert_not_nil job2.queue_position, "Expected non-nil queue position for job2"
-  end
-
   SDK_MASTER = "ca68b24e51992e790f29df5cc4bc54ce1da4a1c2"
   SDK_TAGGED = "00634b2b8a492d6f121e3cf1d6587b821136a9a7"
 
@@ -370,128 +188,6 @@ class JobTest < ActiveSupport::TestCase
     end
   end
 
-  { "master" => SDK_MASTER,
-    "commit2" => SDK_TAGGED,
-    SDK_TAGGED[0, 8] => SDK_TAGGED,
-    "__nonexistent__" => nil,
-  }.each_pair do |search, commit_hash|
-    test "creating job with SDK version '#{search}'" do
-      check_job_sdk_version(commit_hash) do
-        Job.new(job_attrs(sdk_constraint(search)))
-      end
-    end
-
-    test "updating job from no SDK to version '#{search}'" do
-      job = Job.create!(job_attrs)
-      assert_nil job.arvados_sdk_version
-      check_job_sdk_version(commit_hash) do
-        job.runtime_constraints = sdk_constraint(search)[:runtime_constraints]
-        job
-      end
-    end
-
-    test "updating job from SDK version 'master' to '#{search}'" do
-      job = Job.create!(job_attrs(sdk_constraint("master")))
-      assert_equal(SDK_MASTER, job.arvados_sdk_version)
-      check_job_sdk_version(commit_hash) do
-        job.runtime_constraints = sdk_constraint(search)[:runtime_constraints]
-        job
-      end
-    end
-  end
-
-  test "clear the SDK version" do
-    job = Job.create!(job_attrs(sdk_constraint("master")))
-    assert_equal(SDK_MASTER, job.arvados_sdk_version)
-    job.runtime_constraints = {}
-    assert(job.valid?, "job invalid after clearing SDK version")
-    assert_nil(job.arvados_sdk_version)
-  end
-
-  test "job with SDK constraint, without Docker image is invalid" do
-    sdk_attrs = sdk_constraint("master")
-    sdk_attrs[:runtime_constraints].delete("docker_image")
-    job = Job.create(job_attrs(sdk_attrs))
-    refute(job.valid?, "Job valid with SDK version, without Docker image")
-    sdk_errors = job.errors.messages[:arvados_sdk_version] || []
-    refute_empty(sdk_errors.grep(/\bDocker\b/),
-                 "no Job SDK errors mention that Docker is required")
-  end
-
-  test "invalid to clear Docker image constraint when SDK constraint exists" do
-    job = Job.create!(job_attrs(sdk_constraint("master")))
-    job.runtime_constraints.delete("docker_image")
-    refute(job.valid?,
-           "Job with SDK constraint valid after clearing Docker image")
-  end
-
-  test "use migrated docker image if requesting old-format image by tag" do
-    Rails.configuration.Containers.SupportedDockerImageFormats = {'v2'=>{}}
-    add_docker19_migration_link
-    job = Job.create!(
-      job_attrs(
-        script: 'foo',
-        runtime_constraints: {
-          'docker_image' => links(:docker_image_collection_tag).name}))
-    assert(job.valid?)
-    assert_equal(job.docker_image_locator, collections(:docker_image_1_12).portable_data_hash)
-  end
-
-  test "use migrated docker image if requesting old-format image by pdh" do
-    Rails.configuration.Containers.SupportedDockerImageFormats = {'v2'=>{}}
-    add_docker19_migration_link
-    job = Job.create!(
-      job_attrs(
-        script: 'foo',
-        runtime_constraints: {
-          'docker_image' => collections(:docker_image).portable_data_hash}))
-    assert(job.valid?)
-    assert_equal(job.docker_image_locator, collections(:docker_image_1_12).portable_data_hash)
-  end
-
-  [[:docker_image, :docker_image, :docker_image_1_12],
-   [:docker_image_1_12, :docker_image, :docker_image_1_12],
-   [:docker_image, :docker_image_1_12, :docker_image_1_12],
-   [:docker_image_1_12, :docker_image_1_12, :docker_image_1_12],
-  ].each do |existing_image, request_image, expect_image|
-    test "if a #{existing_image} job exists, #{request_image} yields #{expect_image} after migration" do
-      Rails.configuration.Containers.SupportedDockerImageFormats = {'v1'=>{}}
-
-      if existing_image == :docker_image
-        oldjob = Job.create!(
-          job_attrs(
-            script: 'foobar1',
-            runtime_constraints: {
-              'docker_image' => collections(existing_image).portable_data_hash}))
-        oldjob.reload
-        assert_equal(oldjob.docker_image_locator,
-                     collections(existing_image).portable_data_hash)
-      elsif existing_image == :docker_image_1_12
-        assert_raises(ActiveRecord::RecordInvalid,
-                      "Should not resolve v2 image when only v1 is supported") do
-        oldjob = Job.create!(
-          job_attrs(
-            script: 'foobar1',
-            runtime_constraints: {
-              'docker_image' => collections(existing_image).portable_data_hash}))
-        end
-      end
-
-      Rails.configuration.Containers.SupportedDockerImageFormats = {'v2'=>{}}
-      add_docker19_migration_link
-
-      # Check that both v1 and v2 images get resolved to v2.
-      newjob = Job.create!(
-        job_attrs(
-          script: 'foobar1',
-          runtime_constraints: {
-            'docker_image' => collections(request_image).portable_data_hash}))
-      newjob.reload
-      assert_equal(newjob.docker_image_locator,
-                   collections(expect_image).portable_data_hash)
-    end
-  end
-
   test "can't create job with SDK version assigned directly" do
     check_creation_prohibited(arvados_sdk_version: SDK_MASTER)
   end
@@ -500,46 +196,6 @@ class JobTest < ActiveSupport::TestCase
     check_modification_prohibited(arvados_sdk_version: SDK_MASTER)
   end
 
-  test "job validation fails when collection uuid found in script_parameters" do
-    bad_params = {
-      script_parameters: {
-        'input' => {
-          'param1' => 'the collection uuid zzzzz-4zz18-012345678901234'
-        }
-      }
-    }
-    assert_raises(ActiveRecord::RecordInvalid,
-                  "created job with a collection uuid in script_parameters") do
-      Job.create!(job_attrs(bad_params))
-    end
-  end
-
-  test "job validation succeeds when no collection uuid in script_parameters" do
-    good_params = {
-      script_parameters: {
-        'arg1' => 'foo',
-        'arg2' => [ 'bar', 'baz' ],
-        'arg3' => {
-          'a' => 1,
-          'b' => [2, 3, 4],
-        }
-      }
-    }
-    job = Job.create!(job_attrs(good_params))
-    assert job.valid?
-  end
-
-  test 'update job uuid tag in internal.git when version changes' do
-    authorize_with :active
-    j = jobs :queued
-    j.update_attributes repository: 'active/foo', script_version: 'b1'
-    assert_equal('1de84a854e2b440dc53bf42f8548afa4c17da332',
-                 internal_tag(j.uuid))
-    j.update_attributes repository: 'active/foo', script_version: 'master'
-    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
-                 internal_tag(j.uuid))
-  end
-
   test 'script_parameters_digest is independent of key order' do
     j1 = Job.new(job_attrs(script_parameters: {'a' => 'a', 'ddee' => {'d' => 'd', 'e' => 'e'}}))
     j2 = Job.new(job_attrs(script_parameters: {'ddee' => {'e' => 'e', 'd' => 'd'}, 'a' => 'a'}))
@@ -562,17 +218,6 @@ class JobTest < ActiveSupport::TestCase
     assert_equal Job.deep_sort_hash(a).to_json, Job.deep_sort_hash(b).to_json
   end
 
-  test 'find_reusable without logging' do
-    Rails.logger.expects(:info).never
-    try_find_reusable
-  end
-
-  test 'find_reusable with logging' do
-    Rails.configuration.Containers.LogReuseDecisions = true
-    Rails.logger.expects(:info).at_least(3)
-    try_find_reusable
-  end
-
   def try_find_reusable
     foobar = jobs(:foobar)
     example_attrs = {
@@ -600,57 +245,10 @@ class JobTest < ActiveSupport::TestCase
     assert_equal foobar.uuid, j.uuid
   end
 
-  [
-    true,
-    false,
-  ].each do |cascade|
-    test "cancel job with cascade #{cascade}" do
-      job = Job.find_by_uuid jobs(:running_job_with_components_at_level_1).uuid
-      job.cancel cascade: cascade
-      assert_equal Job::Cancelled, job.state
-
-      descendents = ['zzzzz-8i9sb-jobcomponentsl2',
-                     'zzzzz-d1hrv-picomponentsl02',
-                     'zzzzz-8i9sb-job1atlevel3noc',
-                     'zzzzz-8i9sb-job2atlevel3noc']
-
-      jobs = Job.where(uuid: descendents)
-      jobs.each do |j|
-        assert_equal ('Cancelled' == j.state), cascade
-      end
-
-      pipelines = PipelineInstance.where(uuid: descendents)
-      pipelines.each do |pi|
-        assert_equal ('Paused' == pi.state), cascade
-      end
-    end
-  end
-
-  test 'cancelling a completed job raises error' do
-    job = Job.find_by_uuid jobs(:job_with_latest_version).uuid
-    assert job
-    assert_equal 'Complete', job.state
-
-    assert_raises(ArvadosModel::InvalidStateTransitionError) do
-      job.cancel
-    end
-  end
-
-  test 'cancelling a job with circular relationship with another does not result in an infinite loop' do
-    job = Job.find_by_uuid jobs(:running_job_2_with_circular_component_relationship).uuid
-
-    job.cancel cascade: true
-
-    assert_equal Job::Cancelled, job.state
-
-    child = Job.find_by_uuid job.components.collect{|_, uuid| uuid}[0]
-    assert_equal Job::Cancelled, child.state
-  end
-
   test 'enable legacy api configuration option = true' do
     Rails.configuration.Containers.JobsAPI.Enable = "true"
     check_enable_legacy_jobs_api
-    assert_equal({}, Rails.configuration.API.DisabledAPIs)
+    assert_equal(Disable_update_jobs_api_method_list, Rails.configuration.API.DisabledAPIs)
   end
 
   test 'enable legacy api configuration option = false' do
@@ -663,7 +261,7 @@ class JobTest < ActiveSupport::TestCase
     Rails.configuration.Containers.JobsAPI.Enable = "auto"
     assert Job.count > 0
     check_enable_legacy_jobs_api
-    assert_equal({}, Rails.configuration.API.DisabledAPIs)
+    assert_equal(Disable_update_jobs_api_method_list, Rails.configuration.API.DisabledAPIs)
   end
 
   test 'enable legacy api configuration option = auto, no jobs' do
index 8197deee0addec0be7f85e7208a68b6379270d57..614c169e85641c99bf30126428159587dc977373 100644 (file)
@@ -6,112 +6,6 @@ require 'test_helper'
 
 class PipelineInstanceTest < ActiveSupport::TestCase
 
-  test "check active and success for a pipeline in new state" do
-    pi = pipeline_instances :new_pipeline
-
-    assert_equal 'New', pi.state, 'expected state to be New for :new_pipeline'
-
-    # save the pipeline and expect state to be New
-    Thread.current[:user] = users(:admin)
-
-    pi.save
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::New, pi.state, 'expected state to be New for new pipeline'
-  end
-
-  test "check active and success for a newly created pipeline" do
-    set_user_from_auth :active
-
-    pi = PipelineInstance.create(state: 'Ready')
-    pi.save
-
-    assert pi.valid?, 'expected newly created empty pipeline to be valid ' + pi.errors.messages.to_s
-    assert_equal 'Ready', pi.state, 'expected state to be Ready for a new empty pipeline'
-  end
-
-  test "update attributes for pipeline" do
-    Thread.current[:user] = users(:admin)
-
-    pi = pipeline_instances :new_pipeline
-
-    # add a component with no input and expect state to be New
-    component = {'script_parameters' => {"input_not_provided" => {"required" => true}}}
-    pi.components['first'] = component
-    components = pi.components
-    pi.update_attribute 'components', pi.components
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::New, pi.state, 'expected state to be New after adding component with input'
-    assert_equal pi.components.size, 1, 'expected one component'
-    assert_nil pi.started_at, 'expected started_at to be nil on new pipeline instance'
-    assert_nil pi.finished_at, 'expected finished_at to be nil on new pipeline instance'
-
-    # add a component with no input not required
-    component = {'script_parameters' => {"input_not_provided" => {"required" => false}}}
-    pi.components['first'] = component
-    components = pi.components
-    pi.update_attribute 'components', pi.components
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::Ready, pi.state, 'expected state to be Ready after adding component with input'
-    assert_equal pi.components.size, 1, 'expected one component'
-
-    # add a component with input and expect state to become Ready
-    component = {'script_parameters' => {"input" => "yyyad4b39ca5a924e481008009d94e32+210"}}
-    pi.components['first'] = component
-    components = pi.components
-    pi.update_attribute 'components', pi.components
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::Ready, pi.state, 'expected state to be Ready after adding component with input'
-    assert_equal pi.components.size, 1, 'expected one component'
-
-    pi.state = PipelineInstance::RunningOnServer
-    pi.save
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::RunningOnServer, pi.state, 'expected state to be RunningOnServer after updating state to RunningOnServer'
-    assert_not_nil pi.started_at, 'expected started_at to have a value on a running pipeline instance'
-    assert_nil pi.finished_at, 'expected finished_at to be nil on a running pipeline instance'
-
-    pi.state = PipelineInstance::Paused
-    pi.save
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::Paused, pi.state, 'expected state to be Paused after updating state to Paused'
-
-    pi.state = PipelineInstance::Complete
-    pi.save
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::Complete, pi.state, 'expected state to be Complete after updating state to Complete'
-    assert_not_nil pi.started_at, 'expected started_at to have a value on a completed pipeline instance'
-    assert_not_nil pi.finished_at, 'expected finished_at to have a value on a completed pipeline instance'
-
-    pi.state = 'bogus'
-    pi.save
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::Complete, pi.state, 'expected state to be unchanged with set to a bogus value'
-
-    pi.state = PipelineInstance::Failed
-    pi.save
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::Failed, pi.state, 'expected state to be Failed after updating state to Failed'
-    assert_not_nil pi.started_at, 'expected started_at to have a value on a failed pipeline instance'
-    assert_not_nil pi.finished_at, 'expected finished_at to have a value on a failed pipeline instance'
-  end
-
-  test "update attributes for pipeline with two components" do
-    pi = pipeline_instances :new_pipeline
-
-    # add two components, one with input and one with no input and expect state to be New
-    component1 = {'script_parameters' => {"something" => "xxxad4b39ca5a924e481008009d94e32+210", "input" => "c1bad4b39ca5a924e481008009d94e32+210"}}
-    component2 = {'script_parameters' => {"something_else" => "xxxad4b39ca5a924e481008009d94e32+210", "input_missing" => {"required" => true}}}
-    pi.components['first'] = component1
-    pi.components['second'] = component2
-
-    Thread.current[:user] = users(:admin)
-    pi.update_attribute 'components', pi.components
-
-    pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
-    assert_equal PipelineInstance::New, pi.state, 'expected state to be New after adding component with input'
-    assert_equal pi.components.size, 2, 'expected two components'
-  end
-
   [:has_component_with_no_script_parameters,
    :has_component_with_empty_script_parameters].each do |pi_name|
     test "update pipeline that #{pi_name}" do
index 70e589c854c1314269eac7bc4e909449370257ad..0944a318750d995055e970d445756df94e981382 100644 (file)
@@ -780,7 +780,7 @@ class Operations(llfuse.Operations):
     @create_time.time()
     @catch_exceptions
     def create(self, inode_parent, name, mode, flags, ctx=None):
-        name = name.decode()
+        name = name.decode(encoding=self.inodes.encoding)
         _logger.debug("arv-mount create: parent_inode %i '%s' %o", inode_parent, name, mode)
 
         p = self._check_writable(inode_parent)
@@ -798,7 +798,7 @@ class Operations(llfuse.Operations):
     @mkdir_time.time()
     @catch_exceptions
     def mkdir(self, inode_parent, name, mode, ctx=None):
-        name = name.decode()
+        name = name.decode(encoding=self.inodes.encoding)
         _logger.debug("arv-mount mkdir: parent_inode %i '%s' %o", inode_parent, name, mode)
 
         p = self._check_writable(inode_parent)
@@ -813,24 +813,28 @@ class Operations(llfuse.Operations):
     @unlink_time.time()
     @catch_exceptions
     def unlink(self, inode_parent, name, ctx=None):
+        name = name.decode(encoding=self.inodes.encoding)
         _logger.debug("arv-mount unlink: parent_inode %i '%s'", inode_parent, name)
         p = self._check_writable(inode_parent)
-        p.unlink(name.decode())
+        p.unlink(name)
 
     @rmdir_time.time()
     @catch_exceptions
     def rmdir(self, inode_parent, name, ctx=None):
+        name = name.decode(encoding=self.inodes.encoding)
         _logger.debug("arv-mount rmdir: parent_inode %i '%s'", inode_parent, name)
         p = self._check_writable(inode_parent)
-        p.rmdir(name.decode())
+        p.rmdir(name)
 
     @rename_time.time()
     @catch_exceptions
     def rename(self, inode_parent_old, name_old, inode_parent_new, name_new, ctx=None):
+        name_old = name_old.decode(encoding=self.inodes.encoding)
+        name_new = name_new.decode(encoding=self.inodes.encoding)
         _logger.debug("arv-mount rename: old_parent_inode %i '%s' new_parent_inode %i '%s'", inode_parent_old, name_old, inode_parent_new, name_new)
         src = self._check_writable(inode_parent_old)
         dest = self._check_writable(inode_parent_new)
-        dest.rename(name_old.decode(), name_new.decode(), src)
+        dest.rename(name_old, name_new, src)
 
     @flush_time.time()
     @catch_exceptions
index b9a1f3069f9d3e8bd03563c6785cb0b00d388582..8d1062825e85d79a1fe7e60289437c2182060b62 100644 (file)
@@ -17,13 +17,7 @@ import (
 const metricsUpdateInterval = time.Second / 10
 
 type cache struct {
-       TTL                  arvados.Duration
-       UUIDTTL              arvados.Duration
-       MaxCollectionEntries int
-       MaxCollectionBytes   int64
-       MaxPermissionEntries int
-       MaxUUIDEntries       int
-
+       config      *arvados.WebDAVCacheConfig
        registry    *prometheus.Registry
        metrics     cacheMetrics
        pdhs        *lru.TwoQueueCache
@@ -110,15 +104,15 @@ type cachedPermission struct {
 
 func (c *cache) setup() {
        var err error
-       c.pdhs, err = lru.New2Q(c.MaxUUIDEntries)
+       c.pdhs, err = lru.New2Q(c.config.MaxUUIDEntries)
        if err != nil {
                panic(err)
        }
-       c.collections, err = lru.New2Q(c.MaxCollectionEntries)
+       c.collections, err = lru.New2Q(c.config.MaxCollectionEntries)
        if err != nil {
                panic(err)
        }
-       c.permissions, err = lru.New2Q(c.MaxPermissionEntries)
+       c.permissions, err = lru.New2Q(c.config.MaxPermissionEntries)
        if err != nil {
                panic(err)
        }
@@ -164,7 +158,7 @@ func (c *cache) Update(client *arvados.Client, coll arvados.Collection, fs arvad
        })
        if err == nil {
                c.collections.Add(client.AuthToken+"\000"+coll.PortableDataHash, &cachedCollection{
-                       expire:     time.Now().Add(time.Duration(c.TTL)),
+                       expire:     time.Now().Add(time.Duration(c.config.TTL)),
                        collection: &updated,
                })
        }
@@ -221,11 +215,11 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
                }
                if current.PortableDataHash == pdh {
                        c.permissions.Add(permKey, &cachedPermission{
-                               expire: time.Now().Add(time.Duration(c.TTL)),
+                               expire: time.Now().Add(time.Duration(c.config.TTL)),
                        })
                        if pdh != targetID {
                                c.pdhs.Add(targetID, &cachedPDH{
-                                       expire: time.Now().Add(time.Duration(c.UUIDTTL)),
+                                       expire: time.Now().Add(time.Duration(c.config.UUIDTTL)),
                                        pdh:    pdh,
                                })
                        }
@@ -246,19 +240,19 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
        if err != nil {
                return nil, err
        }
-       exp := time.Now().Add(time.Duration(c.TTL))
+       exp := time.Now().Add(time.Duration(c.config.TTL))
        c.permissions.Add(permKey, &cachedPermission{
                expire: exp,
        })
        c.pdhs.Add(targetID, &cachedPDH{
-               expire: time.Now().Add(time.Duration(c.UUIDTTL)),
+               expire: time.Now().Add(time.Duration(c.config.UUIDTTL)),
                pdh:    collection.PortableDataHash,
        })
        c.collections.Add(arv.ApiToken+"\000"+collection.PortableDataHash, &cachedCollection{
                expire:     exp,
                collection: collection,
        })
-       if int64(len(collection.ManifestText)) > c.MaxCollectionBytes/int64(c.MaxCollectionEntries) {
+       if int64(len(collection.ManifestText)) > c.config.MaxCollectionBytes/int64(c.config.MaxCollectionEntries) {
                go c.pruneCollections()
        }
        return collection, nil
@@ -295,7 +289,7 @@ func (c *cache) pruneCollections() {
                }
        }
        for i, k := range keys {
-               if size <= c.MaxCollectionBytes {
+               if size <= c.config.MaxCollectionBytes {
                        break
                }
                if expired[i] {
index d6dd389278e7ae4f05faab2450680a2112fb1545..1f1f6b3bd95133f60e9466d88aa288a8d816b6e8 100644 (file)
@@ -33,7 +33,7 @@ func (s *UnitSuite) TestCache(c *check.C) {
        arv, err := arvadosclient.MakeArvadosClient()
        c.Assert(err, check.Equals, nil)
 
-       cache := DefaultConfig().Cache
+       cache := newConfig(s.Config).Cache
        cache.registry = prometheus.NewRegistry()
 
        // Hit the same collection 5 times using the same token. Only
@@ -114,7 +114,7 @@ func (s *UnitSuite) TestCacheForceReloadByPDH(c *check.C) {
        arv, err := arvadosclient.MakeArvadosClient()
        c.Assert(err, check.Equals, nil)
 
-       cache := DefaultConfig().Cache
+       cache := newConfig(s.Config).Cache
        cache.registry = prometheus.NewRegistry()
 
        for _, forceReload := range []bool{false, true, false, true} {
@@ -134,7 +134,7 @@ func (s *UnitSuite) TestCacheForceReloadByUUID(c *check.C) {
        arv, err := arvadosclient.MakeArvadosClient()
        c.Assert(err, check.Equals, nil)
 
-       cache := DefaultConfig().Cache
+       cache := newConfig(s.Config).Cache
        cache.registry = prometheus.NewRegistry()
 
        for _, forceReload := range []bool{false, true, false, true} {
index 9d9e314fcaf7e25710f1fdd341ca13c7491413f0..f3f8309d329c3150078448cf9535a7f2beac077e 100644 (file)
@@ -52,7 +52,7 @@ func (s *IntegrationSuite) TestCadaverUserProject(c *check.C) {
 }
 
 func (s *IntegrationSuite) testCadaver(c *check.C, password string, pathFunc func(arvados.Collection) (string, string, string), skip func(string) bool) {
-       s.testServer.Config.AnonymousTokens = []string{arvadostest.AnonymousToken}
+       s.testServer.Config.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
 
        testdata := []byte("the human tragedy consists in the necessity of living with the consequences of actions performed under the pressure of compulsions we do not understand")
 
@@ -340,7 +340,7 @@ func (s *IntegrationSuite) runCadaver(c *check.C, password, path, stdin string)
                // unauthenticated request, which it only does in
                // AttachmentOnlyHost, TrustAllContent, and
                // per-collection vhost cases.
-               s.testServer.Config.AttachmentOnlyHost = s.testServer.Addr
+               s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = s.testServer.Addr
 
                cmd.Env = append(os.Environ(), "HOME="+tempdir)
                f, err := os.OpenFile(filepath.Join(tempdir, ".netrc"), os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
index d65156f98781f99cd3fbc4a20b2f0ba144ea8f97..8682eac2dd08b5aaa8f308330ca4a2eba06cf34e 100644 (file)
 //
 // Configuration
 //
-// The default configuration file location is
-// /etc/arvados/keep-web/keep-web.yml.
+// The default cluster configuration file location is
+// /etc/arvados/config.yml.
 //
 // Example configuration file
 //
-//     Client:
-//       APIHost: "zzzzz.arvadosapi.com:443"
-//       AuthToken: ""
-//       Insecure: false
-//     Listen: :1234
-//     AnonymousTokens:
-//       - xxxxxxxxxxxxxxxxxxxx
-//     AttachmentOnlyHost: ""
-//     TrustAllContent: false
+//   Clusters:
+//     zzzzz:
+//       SystemRootToken: ""
+//       Services:
+//         Controller:
+//           ExternalURL: "https://example.com"
+//           Insecure: false
+//         WebDAV:
+//           InternalURLs:
+//             "http://:1234/": {}
+//         WebDAVDownload:
+//           InternalURLs:
+//             "http://:1234/": {}
+//           ExternalURL: "https://download.example.com/"
+//       Users:
+//         AnonymousUserToken: "xxxxxxxxxxxxxxxxxxxx"
+//       Collections:
+//         TrustAllContent: false
 //
 // Starting the server
 //
 // Start a server using the default config file
-// /etc/arvados/keep-web/keep-web.yml:
+// /etc/arvados/config.yml:
 //
 //   keep-web
 //
-// Start a server using the config file /path/to/keep-web.yml:
+// Start a server using the config file /path/to/config.yml:
 //
-//   keep-web -config /path/to/keep-web.yml
+//   keep-web -config /path/to/config.yml
 //
 // Proxy configuration
 //
 //
 // Anonymous downloads
 //
-// The "AnonymousTokens" configuration entry is an array of tokens to
-// use when processing anonymous requests, i.e., whenever a web client
+// The "Users.AnonymousUserToken" configuration entry used when
+// when processing anonymous requests, i.e., whenever a web client
 // does not supply its own Arvados API token via path, query string,
 // cookie, or request header.
 //
-//   "AnonymousTokens":["xxxxxxxxxxxxxxxxxxxxxxx"]
+//   Clusters:
+//     zzzzz:
+//       Users:
+//         AnonymousUserToken: "xxxxxxxxxxxxxxxxxxxxxxx"
 //
 // See http://doc.arvados.org/install/install-keep-web.html for examples.
 //
 // only when the designated origin matches exactly the Host header
 // provided by the client or downstream proxy.
 //
-//   "AttachmentOnlyHost":"domain.example:9999"
+//   Clusters:
+//     zzzzz:
+//       Services:
+//         WebDAVDownload:
+//           ExternalURL: "https://domain.example:9999"
 //
 // Trust All Content mode
 //
 //
 // In such cases you can enable trust-all-content mode.
 //
-//   "TrustAllContent":true
+//   Clusters:
+//     zzzzz:
+//       Collections:
+//         TrustAllContent: true
 //
 // When TrustAllContent is enabled, the only effect of the
-// AttachmentOnlyHost flag is to add a "Content-Disposition:
+// Attachment-Only host setting is to add a "Content-Disposition:
 // attachment" header.
 //
-//   "AttachmentOnlyHost":"domain.example:9999",
-//   "TrustAllContent":true
+//   Clusters:
+//     zzzzz:
+//       Services:
+//         WebDAVDownload:
+//           ExternalURL: "https://domain.example:9999"
+//       Collections:
+//         TrustAllContent: true
 //
 // Depending on your site configuration, you might also want to enable
 // the "trust all content" setting in Workbench. Normally, Workbench
index 837579fe25acfbff5283b28bbb7f4375a3322280..f9e0c1a505376b76e9242ac721a09d7a37fc17ba 100644 (file)
@@ -79,9 +79,10 @@ func (h *handler) setup() {
        h.clientPool = arvadosclient.MakeClientPool()
 
        keepclient.RefreshServiceDiscoveryOnSIGHUP()
+       keepclient.DefaultBlockCache.MaxBlocks = h.Config.cluster.Collections.WebDAVCache.MaxBlockEntries
 
        h.healthHandler = &health.Handler{
-               Token:  h.Config.ManagementToken,
+               Token:  h.Config.cluster.ManagementToken,
                Prefix: "/_health/",
        }
 
@@ -249,9 +250,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        var pathToken bool
        var attachment bool
        var useSiteFS bool
-       credentialsOK := h.Config.TrustAllContent
+       credentialsOK := h.Config.cluster.Collections.TrustAllContent
 
-       if r.Host != "" && r.Host == h.Config.AttachmentOnlyHost {
+       if r.Host != "" && r.Host == h.Config.cluster.Services.WebDAVDownload.ExternalURL.Host {
                credentialsOK = true
                attachment = true
        } else if r.FormValue("disposition") == "attachment" {
@@ -351,7 +352,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
 
        if tokens == nil {
-               tokens = append(reqTokens, h.Config.AnonymousTokens...)
+               tokens = append(reqTokens, h.Config.cluster.Users.AnonymousUserToken)
        }
 
        if len(targetPath) > 0 && targetPath[0] == "_" {
index dd91df354900175592501ce794a6d9dc46cf8f41..1090151e1ae3fd872da91e6badc3ff277008eba8 100644 (file)
@@ -17,18 +17,48 @@ import (
        "regexp"
        "strings"
 
+       "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
        "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
        check "gopkg.in/check.v1"
 )
 
 var _ = check.Suite(&UnitSuite{})
 
-type UnitSuite struct{}
+type UnitSuite struct {
+       Config *arvados.Config
+}
+
+func (s *UnitSuite) SetUpTest(c *check.C) {
+       ldr := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), nil)
+       ldr.Path = "-"
+       cfg, err := ldr.Load()
+       c.Assert(err, check.IsNil)
+       s.Config = cfg
+}
+
+func (s *UnitSuite) TestKeepClientBlockCache(c *check.C) {
+       cfg := newConfig(s.Config)
+       cfg.cluster.Collections.WebDAVCache.MaxBlockEntries = 42
+       h := handler{Config: cfg}
+       c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Not(check.Equals), cfg.cluster.Collections.WebDAVCache.MaxBlockEntries)
+       u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/t=" + arvadostest.ActiveToken + "/foo")
+       req := &http.Request{
+               Method:     "GET",
+               Host:       u.Host,
+               URL:        u,
+               RequestURI: u.RequestURI(),
+       }
+       resp := httptest.NewRecorder()
+       h.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Equals, cfg.cluster.Collections.WebDAVCache.MaxBlockEntries)
+}
 
 func (s *UnitSuite) TestCORSPreflight(c *check.C) {
-       h := handler{Config: DefaultConfig()}
+       h := handler{Config: newConfig(s.Config)}
        u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/foo")
        req := &http.Request{
                Method:     "OPTIONS",
@@ -78,8 +108,8 @@ func (s *UnitSuite) TestInvalidUUID(c *check.C) {
                        RequestURI: u.RequestURI(),
                }
                resp := httptest.NewRecorder()
-               cfg := DefaultConfig()
-               cfg.AnonymousTokens = []string{arvadostest.AnonymousToken}
+               cfg := newConfig(s.Config)
+               cfg.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
                h := handler{Config: cfg}
                h.ServeHTTP(resp, req)
                c.Check(resp.Code, check.Equals, http.StatusNotFound)
@@ -338,7 +368,7 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenRequestAttachment(c *check
 }
 
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSiteFS(c *check.C) {
-       s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+       s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
        resp := s.testVhostRedirectTokenToCookie(c, "GET",
                "download.example.com/by_id/"+arvadostest.FooCollection+"/foo",
                "?api_token="+arvadostest.ActiveToken,
@@ -351,7 +381,7 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenSiteFS(c *check.C) {
 }
 
 func (s *IntegrationSuite) TestPastCollectionVersionFileAccess(c *check.C) {
-       s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+       s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
        resp := s.testVhostRedirectTokenToCookie(c, "GET",
                "download.example.com/c="+arvadostest.WazVersion1Collection+"/waz",
                "?api_token="+arvadostest.ActiveToken,
@@ -373,7 +403,7 @@ func (s *IntegrationSuite) TestPastCollectionVersionFileAccess(c *check.C) {
 }
 
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
-       s.testServer.Config.TrustAllContent = true
+       s.testServer.Config.cluster.Collections.TrustAllContent = true
        s.testVhostRedirectTokenToCookie(c, "GET",
                "example.com/c="+arvadostest.FooCollection+"/foo",
                "?api_token="+arvadostest.ActiveToken,
@@ -385,7 +415,7 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C
 }
 
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
-       s.testServer.Config.AttachmentOnlyHost = "example.com:1234"
+       s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com:1234"
 
        s.testVhostRedirectTokenToCookie(c, "GET",
                "example.com/c="+arvadostest.FooCollection+"/foo",
@@ -430,7 +460,7 @@ func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C)
 }
 
 func (s *IntegrationSuite) TestAnonymousTokenOK(c *check.C) {
-       s.testServer.Config.AnonymousTokens = []string{arvadostest.AnonymousToken}
+       s.testServer.Config.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
        s.testVhostRedirectTokenToCookie(c, "GET",
                "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
                "",
@@ -442,7 +472,7 @@ func (s *IntegrationSuite) TestAnonymousTokenOK(c *check.C) {
 }
 
 func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
-       s.testServer.Config.AnonymousTokens = []string{"anonymousTokenConfiguredButInvalid"}
+       s.testServer.Config.cluster.Users.AnonymousUserToken = "anonymousTokenConfiguredButInvalid"
        s.testVhostRedirectTokenToCookie(c, "GET",
                "example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
                "",
@@ -454,7 +484,7 @@ func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
 }
 
 func (s *IntegrationSuite) TestSpecialCharsInPath(c *check.C) {
-       s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+       s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
 
        client := s.testServer.Config.Client
        client.AuthToken = arvadostest.ActiveToken
@@ -560,17 +590,17 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 }
 
 func (s *IntegrationSuite) TestDirectoryListingWithAnonymousToken(c *check.C) {
-       s.testServer.Config.AnonymousTokens = []string{arvadostest.AnonymousToken}
+       s.testServer.Config.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
        s.testDirectoryListing(c)
 }
 
 func (s *IntegrationSuite) TestDirectoryListingWithNoAnonymousToken(c *check.C) {
-       s.testServer.Config.AnonymousTokens = nil
+       s.testServer.Config.cluster.Users.AnonymousUserToken = ""
        s.testDirectoryListing(c)
 }
 
 func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
-       s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+       s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
        authHeader := http.Header{
                "Authorization": {"OAuth2 " + arvadostest.ActiveToken},
        }
@@ -707,7 +737,7 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
                        cutDirs: 2,
                },
        } {
-               c.Logf("HTML: %q => %q", trial.uri, trial.expect)
+               comment := check.Commentf("HTML: %q => %q", trial.uri, trial.expect)
                resp := httptest.NewRecorder()
                u := mustParseURL("//" + trial.uri)
                req := &http.Request{
@@ -736,19 +766,19 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
                        s.testServer.Handler.ServeHTTP(resp, req)
                }
                if trial.redirect != "" {
-                       c.Check(req.URL.Path, check.Equals, trial.redirect)
+                       c.Check(req.URL.Path, check.Equals, trial.redirect, comment)
                }
                if trial.expect == nil {
-                       c.Check(resp.Code, check.Equals, http.StatusNotFound)
+                       c.Check(resp.Code, check.Equals, http.StatusNotFound, comment)
                } else {
-                       c.Check(resp.Code, check.Equals, http.StatusOK)
+                       c.Check(resp.Code, check.Equals, http.StatusOK, comment)
                        for _, e := range trial.expect {
-                               c.Check(resp.Body.String(), check.Matches, `(?ms).*href="./`+e+`".*`)
+                               c.Check(resp.Body.String(), check.Matches, `(?ms).*href="./`+e+`".*`, comment)
                        }
-                       c.Check(resp.Body.String(), check.Matches, `(?ms).*--cut-dirs=`+fmt.Sprintf("%d", trial.cutDirs)+` .*`)
+                       c.Check(resp.Body.String(), check.Matches, `(?ms).*--cut-dirs=`+fmt.Sprintf("%d", trial.cutDirs)+` .*`, comment)
                }
 
-               c.Logf("WebDAV: %q => %q", trial.uri, trial.expect)
+               comment = check.Commentf("WebDAV: %q => %q", trial.uri, trial.expect)
                req = &http.Request{
                        Method:     "OPTIONS",
                        Host:       u.Host,
@@ -760,9 +790,9 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
                resp = httptest.NewRecorder()
                s.testServer.Handler.ServeHTTP(resp, req)
                if trial.expect == nil {
-                       c.Check(resp.Code, check.Equals, http.StatusNotFound)
+                       c.Check(resp.Code, check.Equals, http.StatusNotFound, comment)
                } else {
-                       c.Check(resp.Code, check.Equals, http.StatusOK)
+                       c.Check(resp.Code, check.Equals, http.StatusOK, comment)
                }
 
                req = &http.Request{
@@ -776,11 +806,11 @@ func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
                resp = httptest.NewRecorder()
                s.testServer.Handler.ServeHTTP(resp, req)
                if trial.expect == nil {
-                       c.Check(resp.Code, check.Equals, http.StatusNotFound)
+                       c.Check(resp.Code, check.Equals, http.StatusNotFound, comment)
                } else {
-                       c.Check(resp.Code, check.Equals, http.StatusMultiStatus)
+                       c.Check(resp.Code, check.Equals, http.StatusMultiStatus, comment)
                        for _, e := range trial.expect {
-                               c.Check(resp.Body.String(), check.Matches, `(?ms).*<D:href>`+filepath.Join(u.Path, e)+`</D:href>.*`)
+                               c.Check(resp.Body.String(), check.Matches, `(?ms).*<D:href>`+filepath.Join(u.Path, e)+`</D:href>.*`, comment)
                        }
                }
        }
@@ -802,7 +832,7 @@ func (s *IntegrationSuite) TestDeleteLastFile(c *check.C) {
 
        var updated arvados.Collection
        for _, fnm := range []string{"foo.txt", "bar.txt"} {
-               s.testServer.Config.AttachmentOnlyHost = "example.com"
+               s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com"
                u, _ := url.Parse("http://example.com/c=" + newCollection.UUID + "/" + fnm)
                req := &http.Request{
                        Method:     "DELETE",
@@ -827,7 +857,7 @@ func (s *IntegrationSuite) TestDeleteLastFile(c *check.C) {
 }
 
 func (s *IntegrationSuite) TestHealthCheckPing(c *check.C) {
-       s.testServer.Config.ManagementToken = arvadostest.ManagementToken
+       s.testServer.Config.cluster.ManagementToken = arvadostest.ManagementToken
        authHeader := http.Header{
                "Authorization": {"Bearer " + arvadostest.ManagementToken},
        }
index 1931256209eb4f211d031114c8297fb0ee53d01f..23a2c659e0db7349af1a2c7221770523ebac6a2b 100644 (file)
@@ -6,7 +6,6 @@
 Description=Arvados Keep web gateway
 Documentation=https://doc.arvados.org/
 After=network.target
-AssertPathExists=/etc/arvados/keep-web/keep-web.yml
 
 # systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
 StartLimitInterval=0
index 018b5a2e817a19075fa5220ef8c87d1541608ed5..0f2cf1237d82a649d5170dc673069b9828de3a05 100644 (file)
@@ -8,52 +8,35 @@ import (
        "flag"
        "fmt"
        "os"
-       "time"
 
+       "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "git.curoverse.com/arvados.git/sdk/go/config"
        "github.com/coreos/go-systemd/daemon"
+       "github.com/ghodss/yaml"
        log "github.com/sirupsen/logrus"
 )
 
 var (
-       defaultConfigPath = "/etc/arvados/keep-web/keep-web.yml"
-       version           = "dev"
+       version = "dev"
 )
 
 // Config specifies server configuration.
 type Config struct {
-       Client arvados.Client
-
-       Listen string
-
-       AnonymousTokens    []string
-       AttachmentOnlyHost string
-       TrustAllContent    bool
-
-       Cache cache
-
-       // Hack to support old command line flag, which is a bool
-       // meaning "get actual token from environment".
-       deprecatedAllowAnonymous bool
-
-       //Authorization token to be included in all health check requests.
-       ManagementToken string
+       Client  arvados.Client
+       Cache   cache
+       cluster *arvados.Cluster
 }
 
-// DefaultConfig returns the default configuration.
-func DefaultConfig() *Config {
-       return &Config{
-               Listen: ":80",
-               Cache: cache{
-                       TTL:                  arvados.Duration(5 * time.Minute),
-                       UUIDTTL:              arvados.Duration(5 * time.Second),
-                       MaxCollectionEntries: 1000,
-                       MaxCollectionBytes:   100000000,
-                       MaxPermissionEntries: 1000,
-                       MaxUUIDEntries:       1000,
-               },
+func newConfig(arvCfg *arvados.Config) *Config {
+       cfg := Config{}
+       var cls *arvados.Cluster
+       var err error
+       if cls, err = arvCfg.GetCluster(""); err != nil {
+               log.Fatal(err)
        }
+       cfg.cluster = cls
+       cfg.Cache.config = &cfg.cluster.Collections.WebDAVCache
+       return &cfg
 }
 
 func init() {
@@ -71,57 +54,57 @@ func init() {
        })
 }
 
-func main() {
-       cfg := DefaultConfig()
-
-       var configPath string
-       deprecated := " (DEPRECATED -- use config file instead)"
-       flag.StringVar(&configPath, "config", defaultConfigPath,
-               "`path` to JSON or YAML configuration file")
-       flag.StringVar(&cfg.Listen, "listen", "",
-               "address:port or :port to listen on"+deprecated)
-       flag.BoolVar(&cfg.deprecatedAllowAnonymous, "allow-anonymous", false,
-               "Load an anonymous token from the ARVADOS_API_TOKEN environment variable"+deprecated)
-       flag.StringVar(&cfg.AttachmentOnlyHost, "attachment-only-host", "",
-               "Only serve attachments at the given `host:port`"+deprecated)
-       flag.BoolVar(&cfg.TrustAllContent, "trust-all-content", false,
-               "Serve non-public content from a single origin. Dangerous: read docs before using!"+deprecated)
-       flag.StringVar(&cfg.ManagementToken, "management-token", "",
-               "Authorization token to be included in all health check requests.")
-
-       dumpConfig := flag.Bool("dump-config", false,
+func configure(logger log.FieldLogger, args []string) *Config {
+       flags := flag.NewFlagSet(args[0], flag.ExitOnError)
+
+       loader := config.NewLoader(os.Stdin, logger)
+       loader.SetupFlags(flags)
+
+       dumpConfig := flags.Bool("dump-config", false,
                "write current configuration to stdout and exit")
-       getVersion := flag.Bool("version", false,
+       getVersion := flags.Bool("version", false,
                "print version information and exit.")
-       flag.Usage = usage
-       flag.Parse()
+
+       args = loader.MungeLegacyConfigArgs(logger, args[1:], "-legacy-keepweb-config")
+       flags.Parse(args)
 
        // Print version information if requested
        if *getVersion {
                fmt.Printf("keep-web %s\n", version)
-               return
+               return nil
        }
 
-       if err := config.LoadFile(cfg, configPath); err != nil {
-               if h := os.Getenv("ARVADOS_API_HOST"); h != "" && configPath == defaultConfigPath {
-                       log.Printf("DEPRECATED: Using ARVADOS_API_HOST environment variable. Use config file instead.")
-                       cfg.Client.APIHost = h
-               } else {
+       arvCfg, err := loader.Load()
+       if err != nil {
+               log.Fatal(err)
+       }
+       cfg := newConfig(arvCfg)
+
+       if *dumpConfig {
+               out, err := yaml.Marshal(cfg)
+               if err != nil {
                        log.Fatal(err)
                }
+               _, err = os.Stdout.Write(out)
+               if err != nil {
+                       log.Fatal(err)
+               }
+               return nil
        }
-       if cfg.deprecatedAllowAnonymous {
-               log.Printf("DEPRECATED: Using -allow-anonymous command line flag with ARVADOS_API_TOKEN environment variable. Use config file instead.")
-               cfg.AnonymousTokens = []string{os.Getenv("ARVADOS_API_TOKEN")}
-       }
+       return cfg
+}
 
-       if *dumpConfig {
-               log.Fatal(config.DumpAndExit(cfg))
+func main() {
+       logger := log.New()
+
+       cfg := configure(logger, os.Args)
+       if cfg == nil {
+               return
        }
 
        log.Printf("keep-web %s started", version)
 
-       os.Setenv("ARVADOS_API_HOST", cfg.Client.APIHost)
+       os.Setenv("ARVADOS_API_HOST", cfg.cluster.Services.Controller.ExternalURL.Host)
        srv := &server{Config: cfg}
        if err := srv.Start(); err != nil {
                log.Fatal(err)
index 167fbbe5b85cf93f012d072e1fd97af3f5bd7106..b81c25175371dde297fcccc8a281fee0af9314d9 100644 (file)
@@ -8,6 +8,7 @@ import (
        "context"
        "net/http"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "github.com/prometheus/client_golang/prometheus"
@@ -25,8 +26,15 @@ func (srv *server) Start() error {
        h.Config.Cache.registry = reg
        ctx := ctxlog.Context(context.Background(), logrus.StandardLogger())
        mh := httpserver.Instrument(reg, nil, httpserver.HandlerWithContext(ctx, httpserver.AddRequestIDs(httpserver.LogRequests(h))))
-       h.MetricsAPI = mh.ServeAPI(h.Config.ManagementToken, http.NotFoundHandler())
+       h.MetricsAPI = mh.ServeAPI(h.Config.cluster.ManagementToken, http.NotFoundHandler())
        srv.Handler = mh
-       srv.Addr = srv.Config.Listen
+       var listen arvados.URL
+       for listen = range srv.Config.cluster.Services.WebDAV.InternalURLs {
+               break
+       }
+       if len(srv.Config.cluster.Services.WebDAV.InternalURLs) > 1 {
+               logrus.Warn("Services.WebDAV.InternalURLs has more than one key; picked: ", listen)
+       }
+       srv.Addr = listen.Host
        return srv.Server.Start()
 }
index 0263dcf08f92c906032664c8b0d3b6de8726d9b7..b856090cac4e6557814e040d74a0c87b02e38904 100644 (file)
@@ -5,6 +5,7 @@
 package main
 
 import (
+       "bytes"
        "crypto/md5"
        "encoding/json"
        "fmt"
@@ -17,6 +18,7 @@ import (
        "strings"
        "testing"
 
+       "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
@@ -148,7 +150,7 @@ type curlCase struct {
 }
 
 func (s *IntegrationSuite) Test200(c *check.C) {
-       s.testServer.Config.AnonymousTokens = []string{arvadostest.AnonymousToken}
+       s.testServer.Config.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
        for _, spec := range []curlCase{
                // My collection
                {
@@ -298,7 +300,7 @@ func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...
 }
 
 func (s *IntegrationSuite) TestMetrics(c *check.C) {
-       s.testServer.Config.AttachmentOnlyHost = s.testServer.Addr
+       s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = s.testServer.Addr
        origin := "http://" + s.testServer.Addr
        req, _ := http.NewRequest("GET", origin+"/notfound", nil)
        _, err := http.DefaultClient.Do(req)
@@ -427,15 +429,23 @@ func (s *IntegrationSuite) TearDownSuite(c *check.C) {
 
 func (s *IntegrationSuite) SetUpTest(c *check.C) {
        arvadostest.ResetEnv()
-       cfg := DefaultConfig()
+       ldr := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), nil)
+       ldr.Path = "-"
+       arvCfg, err := ldr.Load()
+       c.Check(err, check.IsNil)
+       cfg := newConfig(arvCfg)
+       c.Assert(err, check.IsNil)
        cfg.Client = arvados.Client{
                APIHost:  testAPIHost,
                Insecure: true,
        }
-       cfg.Listen = "127.0.0.1:0"
-       cfg.ManagementToken = arvadostest.ManagementToken
+       listen := "127.0.0.1:0"
+       cfg.cluster.Services.WebDAV.InternalURLs[arvados.URL{Host: listen}] = arvados.ServiceInstance{}
+       cfg.cluster.Services.WebDAVDownload.InternalURLs[arvados.URL{Host: listen}] = arvados.ServiceInstance{}
+       cfg.cluster.ManagementToken = arvadostest.ManagementToken
+       cfg.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
        s.testServer = &server{Config: cfg}
-       err := s.testServer.Start()
+       err = s.testServer.Start()
        c.Assert(err, check.Equals, nil)
 }
 
index 62db198dd9b9ef27618b9bfd04262b32ac2736f0..08e94456eb16b445111d87ec8bb897f90062d370 100644 (file)
@@ -15,7 +15,7 @@ import (
 )
 
 func (s *UnitSuite) TestStatus(c *check.C) {
-       h := handler{Config: DefaultConfig()}
+       h := handler{Config: newConfig(s.Config)}
        u, _ := url.Parse("http://keep-web.example/status.json")
        req := &http.Request{
                Method:     "GET",
diff --git a/services/keep-web/usage.go b/services/keep-web/usage.go
deleted file mode 100644 (file)
index 705955b..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "encoding/json"
-       "flag"
-       "fmt"
-       "os"
-)
-
-func usage() {
-       c := DefaultConfig()
-       c.AnonymousTokens = []string{"xxxxxxxxxxxxxxxxxxxxxxx"}
-       c.Client.APIHost = "zzzzz.arvadosapi.com:443"
-       exampleConfigFile, err := json.MarshalIndent(c, "    ", "  ")
-       if err != nil {
-               panic(err)
-       }
-       fmt.Fprintf(os.Stderr, `
-
-Keep-web provides read-only HTTP access to files stored in Keep; see
-https://godoc.org/github.com/curoverse/arvados/services/keep-web and
-http://doc.arvados.org/install/install-keep-web.html
-
-Usage: keep-web -config path/to/keep-web.yml
-
-Options:
-`)
-       flag.PrintDefaults()
-       fmt.Fprintf(os.Stderr, `
-Example config file:
-    %s
-
-Client.APIHost:
-
-    Address (or address:port) of the Arvados API endpoint.
-
-Client.AuthToken:
-
-    Unused. Normally empty, or omitted entirely.
-
-Client.Insecure:
-
-    True if your Arvados API endpoint uses an unverifiable SSL/TLS
-    certificate.
-
-Listen:
-
-    Local port to listen on. Can be "address", "address:port", or
-    ":port", where "address" is a host IP address or name and "port"
-    is a port number or name.
-
-AnonymousTokens:
-
-    Array of tokens to try when a client does not provide a token.
-
-AttachmentOnlyHost:
-
-    Accept credentials, and add "Content-Disposition: attachment"
-    response headers, for requests at this hostname:port.
-
-    This prohibits inline display, which makes it possible to serve
-    untrusted and non-public content from a single origin, i.e.,
-    without wildcard DNS or SSL.
-
-TrustAllContent:
-
-    Serve non-public content from a single origin. Dangerous: read
-    docs before using!
-
-Cache.TTL:
-
-    Maximum time to cache manifests and permission checks.
-
-Cache.UUIDTTL:
-
-    Maximum time to cache collection state.
-
-Cache.MaxCollectionEntries:
-
-    Maximum number of collection cache entries.
-
-Cache.MaxCollectionBytes:
-
-    Approximate memory limit for collection cache.
-
-Cache.MaxPermissionEntries:
-
-    Maximum number of permission cache entries.
-
-Cache.MaxUUIDEntries:
-
-    Maximum number of UUID cache entries.
-
-`, exampleConfigFile)
-}
index ddbbcb12af4ff9b6362e2d381fc243c570985885..e8b3e60910189b11b97d9c42ae073cef082c3b63 100644 (file)
@@ -104,7 +104,7 @@ RUN echo arvados_version is git commit $arvados_version
 
 ADD fuse.conf /etc/
 
-ADD crunch-setup.sh gitolite.rc \
+ADD gitolite.rc \
     keep-setup.sh common.sh createusers.sh \
     logger runsu.sh waitforpostgres.sh \
     yml_override.py api-setup.sh \
index 3444e61e1728b166f6b04c37c5d55ee75940691f..34a0c2d75221b0466c23870ead2a996b929987da 100755 (executable)
@@ -70,6 +70,9 @@ Clusters:
   ${uuid_prefix}:
     ManagementToken: $management_token
     Services:
+      RailsAPI:
+        InternalURLs:
+          "http://localhost:${services[api]}": {}
       Workbench1:
         ExternalURL: "https://$localip:${services[workbench]}"
       Workbench2:
@@ -82,22 +85,35 @@ Clusters:
         ExternalURL: "http://$localip:${services[keepproxy-ssl]}/"
       Websocket:
         ExternalURL: "wss://$localip:${services[websockets-ssl]}/websocket"
+        InternalURLs:
+          "http://localhost:${services[websockets]}": {}
       GitSSH:
         ExternalURL: "ssh://git@$localip:"
       GitHTTP:
         ExternalURL: "http://$localip:${services[arv-git-httpd]}/"
       WebDAV:
+        InternalURLs:
+          "http://localhost:${services[keep-web]}/": {}
+        ExternalURL: "https://$localip:${services[keep-web-ssl]}/"
+      WebDAVDownload:
+        InternalURLs:
+          "http://localhost:${services[keep-web]}/": {}
         ExternalURL: "https://$localip:${services[keep-web-ssl]}/"
+        InternalURLs:
+          "http://localhost:${services[keep-web]}/": {}
       Composer:
         ExternalURL: "http://$localip:${services[composer]}"
       Controller:
         ExternalURL: "https://$localip:${services[controller-ssl]}"
-    NodeProfiles:  # to be deprecated in favor of "Services" section
-      "*":
-        arvados-controller:
-          Listen: ":${services[controller]}" # choose a port
-        arvados-api-server:
-          Listen: ":${services[api]}" # must match Rails server port in your Nginx config
+        InternalURLs:
+          "http://localhost:${services[controller]}": {}
+      RailsAPI:
+        InternalURLs:
+          "http://localhost:${services[api]}/": {}
+      Keepproxy:
+        ExternalURL: "https://$localip:${services[keepproxy-ssl]}"
+        InternalURLs:
+          "http://localhost:${services[keepproxy]}": {}
     PostgreSQL:
       ConnectionPool: 32 # max concurrent connections per arvados server daemon
       Connection:
@@ -113,6 +129,7 @@ Clusters:
     Collections:
       BlobSigningKey: $blob_signing_key
       DefaultReplication: 1
+      TrustAllContent: true
     Login:
       ProviderAppSecret: $sso_app_secret
       ProviderAppID: arvados-server
@@ -122,6 +139,7 @@ Clusters:
       AutoSetupNewUsers: true
       AutoSetupNewUsersWithVmUUID: $vm_uuid
       AutoSetupNewUsersWithRepository: true
+      AnonymousUserToken: $(cat /var/lib/arvados/superuser_token)
     Workbench:
       SecretKeyBase: $workbench_secret_key_base
       ArvadosDocsite: http://$localip:${services[doc]}/
diff --git a/tools/arvbox/lib/arvbox/docker/crunch-setup.sh b/tools/arvbox/lib/arvbox/docker/crunch-setup.sh
deleted file mode 100755 (executable)
index a36e589..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-exec 2>&1
-set -eux -o pipefail
-
-. /usr/local/lib/arvbox/common.sh
-. /usr/local/lib/arvbox/go-setup.sh
-
-flock /var/lib/gopath/gopath.lock go get -t "git.curoverse.com/arvados.git/services/crunchstat"
-flock /var/lib/gopath/gopath.lock go get -t "git.curoverse.com/arvados.git/sdk/go/crunchrunner"
-install $GOPATH/bin/crunchstat $GOPATH/bin/crunchrunner /usr/local/bin
-
-if test -s /var/lib/arvados/api_rails_env ; then
-  RAILS_ENV=$(cat /var/lib/arvados/api_rails_env)
-else
-  RAILS_ENV=development
-fi
-
-export ARVADOS_API_HOST=$localip:${services[controller-ssl]}
-export ARVADOS_API_HOST_INSECURE=1
-export ARVADOS_API_TOKEN=$(cat /usr/src/arvados/services/api/superuser_token)
-export CRUNCH_JOB_BIN=/usr/src/arvados/sdk/cli/bin/crunch-job
-export PERLLIB=/usr/src/arvados/sdk/perl/lib
-export CRUNCH_TMP=/tmp/$1
-export CRUNCH_DISPATCH_LOCKFILE=/var/lock/$1-dispatch
-export CRUNCH_JOB_DOCKER_BIN=docker
-export HOME=/tmp/$1
-export CRUNCH_JOB_DOCKER_RUN_ARGS=--net=host
-# Stop excessive stat of /etc/localtime
-export TZ='America/New_York'
-
-cd /usr/src/arvados/services/api
-if test "$1" = "crunch0" ; then
-    exec bundle exec ./script/crunch-dispatch.rb $RAILS_ENV --jobs --pipelines
-else
-    exec bundle exec ./script/crunch-dispatch.rb $RAILS_ENV --jobs
-fi
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/main/.gitstub
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/run
deleted file mode 120000 (symlink)
index d6aef4a..0000000
+++ /dev/null
@@ -1 +0,0 @@
-/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run
deleted file mode 120000 (symlink)
index a388c8b..0000000
+++ /dev/null
@@ -1 +0,0 @@
-/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run-service b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run-service
deleted file mode 100755 (executable)
index 2b482ec..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-exec /usr/local/lib/arvbox/crunch-setup.sh crunch0
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/main/.gitstub
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/run
deleted file mode 120000 (symlink)
index d6aef4a..0000000
+++ /dev/null
@@ -1 +0,0 @@
-/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run
deleted file mode 120000 (symlink)
index a388c8b..0000000
+++ /dev/null
@@ -1 +0,0 @@
-/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run-service b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run-service
deleted file mode 100755 (executable)
index 0407fb8..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/sh
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-sleep 1
-exec /usr/local/lib/arvbox/crunch-setup.sh crunch1
index b539b6ae1eb5405d88e6e65044a73a34c548b721..a38b633a8bc37bbe6ce81fbf30754a053050b848 100755 (executable)
@@ -16,8 +16,4 @@ if test "$1" = "--only-deps" ; then
     exit
 fi
 
-export ARVADOS_API_HOST=$localip:${services[controller-ssl]}
-export ARVADOS_API_HOST_INSECURE=1
-export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
-
-exec /usr/local/bin/keep-web -trust-all-content -listen=:${services[keep-web]}
+exec /usr/local/bin/keep-web
index 55c647d5ba0ebe2f7b9db0f3554f4d9dade74da6..96457a6e5931a6dffa0378a09333b2c4edd53698 100755 (executable)
@@ -25,8 +25,8 @@ set +e
 read -rd $'\000' keepservice <<EOF
 {
  "service_host":"$localip",
- "service_port":${services[keepproxy]},
- "service_ssl_flag":false,
+ "service_port":${services[keepproxy-ssl]},
+ "service_ssl_flag":true,
  "service_type":"proxy"
 }
 EOF
index b17eeea04096da3d69fb892c83e68f7819972d09..04a1b539526f31547011d02d4db18ae508434883 100755 (executable)
@@ -114,6 +114,7 @@ server {
     server_name keep-web;
     ssl_certificate "${server_cert}";
     ssl_certificate_key "${server_cert_key}";
+    client_max_body_size 0;
     location  / {
       proxy_pass http://keep-web;
       proxy_set_header Host \$http_host;
@@ -123,6 +124,7 @@ server {
     }
   }
 
+
   upstream keepproxy {
     server localhost:${services[keepproxy]};
   }
@@ -131,6 +133,7 @@ server {
     server_name keepproxy;
     ssl_certificate "${server_cert}";
     ssl_certificate_key "${server_cert_key}";
+    client_max_body_size 128M;
     location  / {
       proxy_pass http://keepproxy;
       proxy_set_header Host \$http_host;
index aadc775823caf136c7f7094a0d2b55fcb50f4478..ec7acb8083928f6f35f2af7835ee92f8a4a895dc 100644 (file)
@@ -17,14 +17,20 @@ class ArgumentParser(argparse.ArgumentParser):
             description='Summarize resource usage of an Arvados Crunch job')
         src = self.add_mutually_exclusive_group()
         src.add_argument(
-            '--job', '--container', '--container-request',
+            '--job', '--container-request',
             type=str, metavar='UUID',
-            help='Look up the specified job, container, or container request '
+            help='Look up the specified job or container request '
+            'and read its log data from Keep (or from the Arvados event log, '
+            'if the job is still running)')
+        src.add_argument(
+            '--container',
+            type=str, metavar='UUID',
+            help='[Deprecated] Look up the specified container find its container request '
             'and read its log data from Keep (or from the Arvados event log, '
             'if the job is still running)')
         src.add_argument(
             '--pipeline-instance', type=str, metavar='UUID',
-            help='Summarize each component of the given pipeline instance')
+            help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)')
         src.add_argument(
             '--log-file', type=str,
             help='Read log data from a regular file')
@@ -81,6 +87,8 @@ class Command(object):
             self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs)
         elif self.args.job:
             self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
+        elif self.args.container:
+            self.summer = summarizer.NewSummarizer(self.args.container, **kwargs)
         elif self.args.log_file:
             if self.args.log_file.endswith('.gz'):
                 fh = UTF8Decode(gzip.open(self.args.log_file))
index e962ced31404bfe26a7da36d034871941663ef23..d99d3c1cf8324c4dd33f9bcdd72a15fa4f015d7c 100644 (file)
@@ -470,7 +470,7 @@ class Summarizer(object):
         elif self.detected_crunch1:
             return JobSummarizer.runtime_constraint_mem_unit
         else:
-            return ContainerSummarizer.runtime_constraint_mem_unit
+            return ContainerRequestSummarizer.runtime_constraint_mem_unit
 
     def _map_runtime_constraint(self, key):
         if hasattr(self, 'map_runtime_constraint'):
@@ -501,12 +501,15 @@ def NewSummarizer(process_or_uuid, **kwargs):
 
     if '-dz642-' in uuid:
         if process is None:
-            process = arv.containers().get(uuid=uuid).execute()
-        klass = ContainerTreeSummarizer
+            # Get the associated CR. Doesn't matter which since they all have the same logs
+            crs = arv.container_requests().list(filters=[['container_uuid','=',uuid]],limit=1).execute()['items']
+            if len(crs) > 0:
+                process = crs[0]
+        klass = ContainerRequestTreeSummarizer
     elif '-xvhdp-' in uuid:
         if process is None:
             process = arv.container_requests().get(uuid=uuid).execute()
-        klass = ContainerTreeSummarizer
+        klass = ContainerRequestTreeSummarizer
     elif '-8i9sb-' in uuid:
         if process is None:
             process = arv.jobs().get(uuid=uuid).execute()
@@ -530,9 +533,14 @@ class ProcessSummarizer(Summarizer):
         self.process = process
         if label is None:
             label = self.process.get('name', self.process['uuid'])
-        if self.process.get('log'):
+        # Pre-Arvados v1.4 everything is in 'log'
+        # For 1.4+ containers have no logs and container_requests have them in 'log_uuid', not 'log'
+        log_collection = self.process.get('log')
+        if not log_collection:
+            log_collection = self.process.get('log_uuid')
+        if log_collection:
             try:
-                rdr = crunchstat_summary.reader.CollectionReader(self.process['log'])
+                rdr = crunchstat_summary.reader.CollectionReader(log_collection)
             except arvados.errors.NotFoundError as e:
                 logger.warning("Trying event logs after failing to read "
                                "log collection %s: %s", self.process['log'], e)
@@ -552,7 +560,7 @@ class JobSummarizer(ProcessSummarizer):
     }
 
 
-class ContainerSummarizer(ProcessSummarizer):
+class ContainerRequestSummarizer(ProcessSummarizer):
     runtime_constraint_mem_unit = 1
 
 
@@ -653,7 +661,7 @@ class PipelineSummarizer(MultiSummarizer):
             **kwargs)
 
 
-class ContainerTreeSummarizer(MultiSummarizer):
+class ContainerRequestTreeSummarizer(MultiSummarizer):
     def __init__(self, root, skip_child_jobs=False, **kwargs):
         arv = arvados.api('v1', model=OrderedJsonModel())
 
@@ -666,10 +674,8 @@ class ContainerTreeSummarizer(MultiSummarizer):
             current = todo.popleft()
             label = current['name']
             sort_key = current['created_at']
-            if current['uuid'].find('-xvhdp-') > 0:
-                current = arv.containers().get(uuid=current['container_uuid']).execute()
 
-            summer = ContainerSummarizer(current, label=label, **kwargs)
+            summer = ContainerRequestSummarizer(current, label=label, **kwargs)
             summer.sort_key = sort_key
             children[current['uuid']] = summer
 
@@ -678,7 +684,7 @@ class ContainerTreeSummarizer(MultiSummarizer):
                 child_crs = arv.container_requests().index(
                     order=['uuid asc'],
                     filters=page_filters+[
-                        ['requesting_container_uuid', '=', current['uuid']]],
+                        ['requesting_container_uuid', '=', current['container_uuid']]],
                 ).execute()
                 if not child_crs['items']:
                     break
@@ -696,7 +702,7 @@ class ContainerTreeSummarizer(MultiSummarizer):
         sorted_children = collections.OrderedDict()
         for uuid in sorted(list(children.keys()), key=lambda uuid: children[uuid].sort_key):
             sorted_children[uuid] = children[uuid]
-        super(ContainerTreeSummarizer, self).__init__(
+        super(ContainerRequestTreeSummarizer, self).__init__(
             children=sorted_children,
             label=root['name'],
             **kwargs)
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz
deleted file mode 100644 (file)
index ff7dd30..0000000
Binary files a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz
new file mode 100644 (file)
index 0000000..3fe0220
Binary files /dev/null and b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz differ
similarity index 66%
rename from tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz
rename to tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz
index 249ad22e47157aa24a7a42304533676b150ce652..fc01ce9a8f124e2fe3d88ef20394e966700b2326 100644 (file)
Binary files a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz and b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz differ
diff --git a/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report b/tools/crunchstat-summary/tests/container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y.txt.gz.report
new file mode 100644 (file)
index 0000000..5152e57
--- /dev/null
@@ -0,0 +1,39 @@
+category       metric  task_max        task_max_rate   job_total
+blkio:0:0      read    0       0       0
+blkio:0:0      write   0       0       0
+cpu    cpus    20      -       -
+cpu    sys     0.39    0.04    0.39
+cpu    user    2.06    0.20    2.06
+cpu    user+sys        2.45    0.24    2.45
+fuseops        read    0       0       0
+fuseops        write   0       0       0
+keepcache      hit     0       0       0
+keepcache      miss    0       0       0
+keepcalls      get     0       0       0
+keepcalls      put     0       0       0
+mem    cache   172032  -       -
+mem    pgmajfault      0       -       0
+mem    rss     69525504        -       -
+mem    swap    0       -       -
+net:eth0       rx      859480  1478.97 859480
+net:eth0       tx      55888   395.71  55888
+net:eth0       tx+rx   915368  1874.69 915368
+net:keep0      rx      0       0       0
+net:keep0      tx      0       0       0
+net:keep0      tx+rx   0       0       0
+statfs available       397744787456    -       397744787456
+statfs total   402611240960    -       402611240960
+statfs used    4870303744      52426.18        4866453504
+time   elapsed 20      -       20
+# Number of tasks: 1
+# Max CPU time spent by a single task: 2.45s
+# Max CPU usage in a single interval: 23.70%
+# Overall CPU usage: 12.25%
+# Max memory used by a single task: 0.07GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+# Temp disk utilization 1.21%
+#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
+#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
index 0270eaaec06d7fa521e8279022035b6a3bf5bd01..fb23eab39e9072f9b44ac5e3b766d25c524e5668 100644 (file)
@@ -67,29 +67,37 @@ class SummarizeEdgeCases(unittest.TestCase):
         s.run()
 
 
-class SummarizeContainer(ReportDiff):
+class SummarizeContainerCommon(ReportDiff):
     fake_container = {
         'uuid': '9tee4-dz642-lymtndkpy39eibk',
         'created_at': '2017-08-18T14:27:25.371388141',
         'log': '9tee4-4zz18-ihyzym9tcwjwg4r',
     }
     fake_request = {
-        'uuid': '9tee4-xvhdp-uper95jktm10d3w',
+        'uuid': '9tee4-xvhdp-kk0ja1cl8b2kr1y',
         'name': 'container',
         'created_at': '2017-08-18T14:27:25.242339223Z',
         'container_uuid': fake_container['uuid'],
-    }
-    reportfile = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk.txt.gz')
+        'runtime_constraints': {
+            'vcpus': 1,
+            'ram': 2621440000
+            },
+        'log_uuid' : '9tee4-4zz18-m2swj50nk0r8b6y'
+        }
+
     logfile = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz')
+        TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz')
     arvmountlog = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz')
+        TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz')
 
     @mock.patch('arvados.collection.CollectionReader')
     @mock.patch('arvados.api')
-    def test_container(self, mock_api, mock_cr):
-        mock_api().container_requests().index().execute.return_value = {'items':[]}
+    def check_common(self, mock_api, mock_cr):
+        items = [ {'items':[self.fake_request]}] + [{'items':[]}] * 100
+        # Index and list mean the same thing, but are used in different places in the
+        # code. It's fragile, but exploit that fact to distinguish the two uses.
+        mock_api().container_requests().index().execute.return_value = {'items': [] }  # child_crs
+        mock_api().container_requests().list().execute.side_effect = items # parent request
         mock_api().container_requests().get().execute.return_value = self.fake_request
         mock_api().containers().get().execute.return_value = self.fake_container
         mock_cr().__iter__.return_value = [
@@ -102,12 +110,31 @@ class SummarizeContainer(ReportDiff):
                 return UTF8Decode(gzip.open(self.arvmountlog))
         mock_cr().open.side_effect = _open
         args = crunchstat_summary.command.ArgumentParser().parse_args(
-            ['--container', self.fake_request['uuid']])
+            self.arg_strings)
         cmd = crunchstat_summary.command.Command(args)
         cmd.run()
         self.diff_known_report(self.reportfile, cmd)
 
 
+
+class SummarizeContainer(SummarizeContainerCommon):
+    uuid = '9tee4-dz642-lymtndkpy39eibk'
+    reportfile = os.path.join(TESTS_DIR, 'container_%s.txt.gz' % uuid)
+    arg_strings = ['--container', uuid, '-v', '-v']
+
+    def test_container(self):
+        self.check_common()
+
+
+class SummarizeContainerRequest(SummarizeContainerCommon):
+    uuid = '9tee4-xvhdp-kk0ja1cl8b2kr1y'
+    reportfile = os.path.join(TESTS_DIR, 'container_request_%s.txt.gz' % uuid)
+    arg_strings = ['--container-request', uuid, '-v', '-v']
+
+    def test_container_request(self):
+        self.check_common()
+
+
 class SummarizeJob(ReportDiff):
     fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
     fake_log_id = 'fake-log-collection-id'