Make sure persistence links get added for persistent components, even
authorTom Clegg <tom@curoverse.com>
Mon, 24 Mar 2014 04:55:04 +0000 (00:55 -0400)
committerTom Clegg <tom@curoverse.com>
Mon, 24 Mar 2014 04:55:04 +0000 (00:55 -0400)
when reusing jobs that are not marked persistent or belong to other
users.

sdk/cli/arvados-cli.gemspec
sdk/cli/bin/arv-run-pipeline-instance

index 5551fec063b618da5130e82ad475c608cc570ef2..c43e3b8c1f26c2570961de26babb46cbec3b9998 100644 (file)
@@ -18,6 +18,7 @@ Gem::Specification.new do |s|
   s.executables << "arv-run-pipeline-instance"
   s.executables << "arv-crunch-job"
   s.executables << "arv-tag"
+  s.add_runtime_dependency 'arvados', '~> 0.1.0'
   s.add_runtime_dependency 'google-api-client', '~> 0.6.3'
   s.add_runtime_dependency 'activesupport', '~> 3.2', '>= 3.2.13'
   s.add_runtime_dependency 'json', '~> 1.7', '>= 1.7.7'
index 09f894ae2c39b8dee6d620414f4e67755e5df435..7578abc7b0099f8cbba81aaf4775e0ac15741ec0 100755 (executable)
@@ -79,6 +79,7 @@ $arvados_api_token = ENV['ARVADOS_API_TOKEN'] or
   abort "#{$0}: fatal: ARVADOS_API_TOKEN environment variable not set."
 
 begin
+  require 'arvados'
   require 'rubygems'
   require 'json'
   require 'pp'
@@ -89,7 +90,7 @@ rescue LoadError => l
   abort <<-EOS
 #{$0}: fatal: #{l.message}
 Some runtime dependencies may be missing.
-Try: gem install pp google-api-client json trollop
+Try: gem install arvados pp google-api-client json trollop
   EOS
 end
 
@@ -225,6 +226,7 @@ $client ||= Google::APIClient.
       :application_name => File.split($0).last,
       :application_version => $application_version.to_s)
 $arvados = $client.discovered_api('arvados', $arvados_api_version)
+$arv = Arvados.new api_version: 'v1'
 
 
 class PipelineInstance
@@ -433,6 +435,9 @@ class WhRunPipelineInstance
       moretodo = false
       @components.each do |cname, c|
         job = nil
+        c_already_finished = (c[:job] &&
+                              c[:job][:uuid] &&
+                              !c[:job][:success].nil?)
         if !c[:job] and
             c[:script_parameters].select { |pname, p| p.is_a? Hash }.empty?
           # Job is fully specified (all parameter values are present) but
@@ -524,6 +529,41 @@ class WhRunPipelineInstance
                 end
               end
             end
+            unless c_already_finished
+              if c[:output_is_persistent]
+                # This is my first time discovering that the job
+                # succeeded. I need to make sure a resources/wants
+                # link is in place to protect the output from garbage
+                # collection. (Normally Crunch does this for me, but
+                # here I might be reusing the output of someone else's
+                # job and I need to make sure it's understood that the
+                # output is valuable to me, too.)
+                wanted = c[:job][:output]
+                debuglog "checking for existing persistence link for #{wanted}"
+                @my_user_uuid ||= $arv.user.current[:uuid]
+                links = $arv.link.list(limit: 1,
+                                       filters:
+                                       [%w(link_class = resources),
+                                        %w(name = wants),
+                                        %w(tail_uuid =) + [@my_user_uuid],
+                                        %w(head_uuid =) + [wanted]
+                                       ])[:items]
+                if links.any?
+                  debuglog "link already exists, uuid #{links.first[:uuid]}"
+                else
+                  newlink = $arv.link.create link: \
+                  {
+                    link_class: 'resources',
+                    name: 'wants',
+                    tail_kind: 'arvados#user',
+                    tail_uuid: @my_user_uuid,
+                    head_kind: 'arvados#collection',
+                    head_uuid: wanted
+                  }
+                  debuglog "added link, uuid #{newlink[:uuid]}"
+                end
+              end
+            end
           elsif c[:job][:running] ||
               (!c[:job][:started_at] && !c[:job][:cancelled_at])
             moretodo = true