From 381f3ce2529a027cc0eb1c402b94135711658f6b Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Mon, 24 Mar 2014 00:55:04 -0400 Subject: [PATCH] Make sure persistence links get added for persistent components, even when reusing jobs that are not marked persistent or belong to other users. --- sdk/cli/arvados-cli.gemspec | 1 + sdk/cli/bin/arv-run-pipeline-instance | 42 ++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/sdk/cli/arvados-cli.gemspec b/sdk/cli/arvados-cli.gemspec index 5551fec063..c43e3b8c1f 100644 --- a/sdk/cli/arvados-cli.gemspec +++ b/sdk/cli/arvados-cli.gemspec @@ -18,6 +18,7 @@ Gem::Specification.new do |s| s.executables << "arv-run-pipeline-instance" s.executables << "arv-crunch-job" s.executables << "arv-tag" + s.add_runtime_dependency 'arvados', '~> 0.1.0' s.add_runtime_dependency 'google-api-client', '~> 0.6.3' s.add_runtime_dependency 'activesupport', '~> 3.2', '>= 3.2.13' s.add_runtime_dependency 'json', '~> 1.7', '>= 1.7.7' diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance index 09f894ae2c..7578abc7b0 100755 --- a/sdk/cli/bin/arv-run-pipeline-instance +++ b/sdk/cli/bin/arv-run-pipeline-instance @@ -79,6 +79,7 @@ $arvados_api_token = ENV['ARVADOS_API_TOKEN'] or abort "#{$0}: fatal: ARVADOS_API_TOKEN environment variable not set." begin + require 'arvados' require 'rubygems' require 'json' require 'pp' @@ -89,7 +90,7 @@ rescue LoadError => l abort <<-EOS #{$0}: fatal: #{l.message} Some runtime dependencies may be missing. -Try: gem install pp google-api-client json trollop +Try: gem install arvados pp google-api-client json trollop EOS end @@ -225,6 +226,7 @@ $client ||= Google::APIClient. :application_name => File.split($0).last, :application_version => $application_version.to_s) $arvados = $client.discovered_api('arvados', $arvados_api_version) +$arv = Arvados.new api_version: 'v1' class PipelineInstance @@ -433,6 +435,9 @@ class WhRunPipelineInstance moretodo = false @components.each do |cname, c| job = nil + c_already_finished = (c[:job] && + c[:job][:uuid] && + !c[:job][:success].nil?) if !c[:job] and c[:script_parameters].select { |pname, p| p.is_a? Hash }.empty? # Job is fully specified (all parameter values are present) but @@ -524,6 +529,41 @@ class WhRunPipelineInstance end end end + unless c_already_finished + if c[:output_is_persistent] + # This is my first time discovering that the job + # succeeded. I need to make sure a resources/wants + # link is in place to protect the output from garbage + # collection. (Normally Crunch does this for me, but + # here I might be reusing the output of someone else's + # job and I need to make sure it's understood that the + # output is valuable to me, too.) + wanted = c[:job][:output] + debuglog "checking for existing persistence link for #{wanted}" + @my_user_uuid ||= $arv.user.current[:uuid] + links = $arv.link.list(limit: 1, + filters: + [%w(link_class = resources), + %w(name = wants), + %w(tail_uuid =) + [@my_user_uuid], + %w(head_uuid =) + [wanted] + ])[:items] + if links.any? + debuglog "link already exists, uuid #{links.first[:uuid]}" + else + newlink = $arv.link.create link: \ + { + link_class: 'resources', + name: 'wants', + tail_kind: 'arvados#user', + tail_uuid: @my_user_uuid, + head_kind: 'arvados#collection', + head_uuid: wanted + } + debuglog "added link, uuid #{newlink[:uuid]}" + end + end + end elsif c[:job][:running] || (!c[:job][:started_at] && !c[:job][:cancelled_at]) moretodo = true -- 2.30.2