X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/26446197bb00d7f899673b7f9434e6bd622b8904..ff3686b372cd598ee990acd990af2b04daf91ada:/sdk/cli/bin/arv-run-pipeline-instance diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance index e0e002fc7f..fc636dff50 100755 --- a/sdk/cli/bin/arv-run-pipeline-instance +++ b/sdk/cli/bin/arv-run-pipeline-instance @@ -74,6 +74,7 @@ $arvados_api_token = ENV['ARVADOS_API_TOKEN'] or abort "#{$0}: fatal: ARVADOS_API_TOKEN environment variable not set." begin + require 'arvados' require 'rubygems' require 'json' require 'pp' @@ -84,7 +85,7 @@ rescue LoadError => l abort <<-EOS #{$0}: fatal: #{l.message} Some runtime dependencies may be missing. -Try: gem install pp google-api-client json trollop +Try: gem install arvados pp google-api-client json trollop EOS end @@ -216,6 +217,7 @@ $client ||= Google::APIClient. :application_name => File.split($0).last, :application_version => $application_version.to_s) $arvados = $client.discovered_api('arvados', $arvados_api_version) +$arv = Arvados.new api_version: 'v1' class PipelineInstance @@ -224,10 +226,10 @@ class PipelineInstance :parameters => { :uuid => uuid }, - :body => { - :api_token => ENV['ARVADOS_API_TOKEN'] - }, - :authenticated => false) + :authenticated => false, + :headers => { + authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + }) j = JSON.parse result.body, :symbolize_names => true unless j.is_a? Hash and j[:uuid] debuglog "Failed to get pipeline_instance: #{j[:errors] rescue nil}", 0 @@ -240,10 +242,12 @@ class PipelineInstance def self.create(attributes) result = $client.execute(:api_method => $arvados.pipeline_instances.create, :body => { - :api_token => ENV['ARVADOS_API_TOKEN'], :pipeline_instance => attributes }, - :authenticated => false) + :authenticated => false, + :headers => { + authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + }) j = JSON.parse result.body, :symbolize_names => true unless j.is_a? Hash and j[:uuid] abort "Failed to create pipeline_instance: #{j[:errors] rescue nil} #{j.inspect}" @@ -257,10 +261,12 @@ class PipelineInstance :uuid => @pi[:uuid] }, :body => { - :api_token => ENV['ARVADOS_API_TOKEN'], :pipeline_instance => @attributes_to_update.to_json }, - :authenticated => false) + :authenticated => false, + :headers => { + authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + }) j = JSON.parse result.body, :symbolize_names => true unless j.is_a? Hash and j[:uuid] debuglog "Failed to save pipeline_instance: #{j[:errors] rescue nil}", 0 @@ -289,20 +295,24 @@ class JobCache @cache ||= {} result = $client.execute(:api_method => $arvados.jobs.get, :parameters => { - :api_token => ENV['ARVADOS_API_TOKEN'], :uuid => uuid }, - :authenticated => false) + :authenticated => false, + :headers => { + authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + }) @cache[uuid] = JSON.parse result.body, :symbolize_names => true end def self.where(conditions) result = $client.execute(:api_method => $arvados.jobs.list, :parameters => { - :api_token => ENV['ARVADOS_API_TOKEN'], :limit => 10000, :where => conditions.to_json }, - :authenticated => false) + :authenticated => false, + :headers => { + authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + }) list = JSON.parse result.body, :symbolize_names => true if list and list[:items].is_a? Array list[:items] @@ -310,19 +320,21 @@ class JobCache [] end end - def self.create(attributes) + def self.create(job, create_params) @cache ||= {} result = $client.execute(:api_method => $arvados.jobs.create, - :parameters => { - :api_token => ENV['ARVADOS_API_TOKEN'], - :job => attributes.to_json - }, - :authenticated => false) + :body => { + :job => job.to_json + }.merge(create_params), + :authenticated => false, + :headers => { + authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + }) j = JSON.parse result.body, :symbolize_names => true if j.is_a? Hash and j[:uuid] @cache[j[:uuid]] = j else - debuglog "create job: #{j[:errors] rescue nil} with attribute #{attributes}", 0 + debuglog "create job: #{j[:errors] rescue nil} with attributes #{job}", 0 nil end end @@ -346,10 +358,12 @@ class WhRunPipelineInstance else result = $client.execute(:api_method => $arvados.pipeline_templates.get, :parameters => { - :api_token => ENV['ARVADOS_API_TOKEN'], :uuid => template }, - :authenticated => false) + :authenticated => false, + :headers => { + authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + }) @template = JSON.parse result.body, :symbolize_names => true if !@template[:uuid] abort "#{$0}: fatal: failed to retrieve pipeline template #{template} #{@template[:errors].inspect rescue nil}" @@ -411,33 +425,62 @@ class WhRunPipelineInstance end def setup_instance - @instance ||= PipelineInstance. - create(:components => @components, + if $options[:submit] + @instance ||= PipelineInstance. + create(:components => @components, + :pipeline_template_uuid => @template[:uuid], + :state => 'New') + else + @instance ||= PipelineInstance. + create(:components => @components, :pipeline_template_uuid => @template[:uuid], - :active => true) + :state => 'RunningOnClient') + end self end def run moretodo = true + interrupted = false + while moretodo moretodo = false @components.each do |cname, c| job = nil - + owner_uuid = @instance[:owner_uuid] + # Is the job satisfying this component already known to be + # finished? (Already meaning "before we query API server about + # the job's current state") + c_already_finished = (c[:job] && + c[:job][:uuid] && + !c[:job][:success].nil?) if !c[:job] and c[:script_parameters].select { |pname, p| p.is_a? Hash and p[:output_of]}.empty? # No job yet associated with this component and is component inputs # are fully specified (any output_of script_parameters are resolved # to real value) - job = JobCache.create({:script => c[:script], - :script_parameters => c[:script_parameters], - :script_version => c[:script_version], - :repository => c[:repository], - :minimum_script_version => c[:minimum_script_version], - :exclude_script_versions => c[:exclude_minimum_script_versions], - :nondeterministic => c[:nondeterministic], - :no_reuse => @options[:no_reuse]}) + job = JobCache.create({ + :script => c[:script], + :script_parameters => c[:script_parameters], + :script_version => c[:script_version], + :repository => c[:repository], + :nondeterministic => c[:nondeterministic], + :output_is_persistent => c[:output_is_persistent] || false, + :owner_uuid => owner_uuid, + # TODO: Delete the following three attributes when + # supporting pre-20140418 API servers is no longer + # important. New API servers take these as flags that + # control behavior of create, rather than job attributes. + :minimum_script_version => c[:minimum_script_version], + :exclude_script_versions => c[:exclude_minimum_script_versions], + :no_reuse => @options[:no_reuse] || c[:nondeterministic], + }, { + # This is the right place to put these attributes when + # dealing with new API servers. + :minimum_script_version => c[:minimum_script_version], + :exclude_script_versions => c[:exclude_minimum_script_versions], + :find_or_create => !(@options[:no_reuse] || c[:nondeterministic]), + }) if job debuglog "component #{cname} new job #{job[:uuid]}" c[:job] = job @@ -450,7 +493,7 @@ class WhRunPipelineInstance if (c[:job][:running] or not (c[:job][:finished_at] or c[:job][:cancelled_at])) # Job is running so update copy of job record - c[:job] = JobCache.get(c[:job][:uuid]) + c[:job] = JobCache.get(c[:job][:uuid]) end if c[:job][:success] @@ -465,6 +508,44 @@ class WhRunPipelineInstance end end end + unless c_already_finished + # This is my first time discovering that the job + # succeeded. (At the top of this loop, I was still + # waiting for it to finish.) + if c[:output_is_persistent] + # I need to make sure a resources/wants link is in + # place to protect the output from garbage + # collection. (Normally Crunch does this for me, but + # here I might be reusing the output of someone else's + # job and I need to make sure it's understood that the + # output is valuable to me, too.) + wanted = c[:job][:output] + debuglog "checking for existing persistence link for #{wanted}" + @my_user_uuid ||= $arv.user.current[:uuid] + links = $arv.link.list(limit: 1, + filters: + [%w(link_class = resources), + %w(name = wants), + %w(tail_uuid =) + [@my_user_uuid], + %w(head_uuid =) + [wanted] + ])[:items] + if links.any? + debuglog "link already exists, uuid #{links.first[:uuid]}" + else + newlink = $arv.link.create link: \ + { + link_class: 'resources', + name: 'wants', + tail_kind: 'arvados#user', + tail_uuid: @my_user_uuid, + head_kind: 'arvados#collection', + head_uuid: wanted, + owner_uuid: owner_uuid + } + debuglog "added link, uuid #{newlink[:uuid]}" + end + end + end elsif c[:job][:running] || (!c[:job][:started_at] && !c[:job][:cancelled_at]) # Job is still running @@ -475,7 +556,6 @@ class WhRunPipelineInstance end end @instance[:components] = @components - @instance[:active] = moretodo report_status if @options[:no_wait] @@ -487,7 +567,8 @@ class WhRunPipelineInstance sleep 10 rescue Interrupt debuglog "interrupt", 0 - abort + interrupted = true + break end end end @@ -507,18 +588,31 @@ class WhRunPipelineInstance end end end - - if ended == @components.length or failed > 0 - @instance[:active] = false - @instance[:success] = (succeeded == @components.length) + + success = (succeeded == @components.length) + + if interrupted + if success + @instance[:state] = 'Complete' + else + @instance[:state] = 'Paused' + end + else + if ended == @components.length or failed > 0 + @instance[:state] = success ? 'Complete' : 'Failed' + end end + # set components_summary + components_summary = {"todo" => @components.length - ended, "done" => succeeded, "failed" => failed} + @instance[:components_summary] = components_summary + @instance.save end def cleanup - if @instance - @instance[:active] = false + if @instance and @instance[:state] == 'RunningOnClient' + @instance[:state] = 'Paused' @instance.save end end