X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/4c4c295193379003fa6c94c9c06e3bdec574e265..7b5729d984a9c516920270250f050bf72f1896d1:/sdk/cli/bin/arv-run-pipeline-instance diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance index 3c7da7dfab..63313fc808 100755 --- a/sdk/cli/bin/arv-run-pipeline-instance +++ b/sdk/cli/bin/arv-run-pipeline-instance @@ -42,6 +42,8 @@ # [--status-json path] Print JSON status report to a file or # fifo. Default: /dev/null # +# [--description] Description for the pipeline instance. +# # == Parameters # # [param_name=param_value] @@ -151,7 +153,7 @@ p = Trollop::Parser.new do :short => :none, :type => :string) opt(:submit, - "Submit the pipeline instance to the server, and exit. Let the Crunch dispatch service to satisfy the components by finding/running jobs.", + "Submit the pipeline instance to the server, and exit. Let the Crunch dispatch service satisfy the components by finding/running jobs.", :short => :none, :type => :boolean) opt(:run_pipeline_here, @@ -166,6 +168,10 @@ p = Trollop::Parser.new do "Synonym for --run-jobs-here.", :short => :none, :type => :boolean) + opt(:description, + "Description for the pipeline instance.", + :short => :none, + :type => :string) stop_on [:'--'] end $options = Trollop::with_standard_exception_handling p do @@ -439,12 +445,17 @@ class WhRunPipelineInstance if value.nil? and ![false,'false',0,'0'].index parameter[:required] if parameter[:output_of] + if not @components[parameter[:output_of].intern] + errors << [componentname, parametername, "output_of refers to nonexistent component '#{parameter[:output_of]}'"] + end next end errors << [componentname, parametername, "required parameter is missing"] end debuglog "parameter #{componentname}::#{parametername} == #{value}" - component[:script_parameters][parametername] = value + + component[:script_parameters][parametername] = + parameter.dup.merge(value: value) end end if !errors.empty? @@ -468,6 +479,8 @@ class WhRunPipelineInstance end end else + description = $options[:description] + description = ("Created at #{Time.now.localtime}" + (@template[:name].andand.size.andand>0 ? " using the pipeline template *#{@template[:name]}*" : "")) if !description @instance = PipelineInstance. create(components: @components, properties: { @@ -476,6 +489,7 @@ class WhRunPipelineInstance } }, pipeline_template_uuid: @template[:uuid], + description: description, state: ($options[:submit] ? 'RunningOnServer' : 'RunningOnClient')) end self @@ -485,6 +499,10 @@ class WhRunPipelineInstance moretodo = true interrupted = false + if @instance[:started_at].nil? + @instance[:started_at] = Time.now + end + job_creation_failed = 0 while moretodo moretodo = false @@ -496,7 +514,7 @@ class WhRunPipelineInstance # the job's current state") c_already_finished = (c[:job] && c[:job][:uuid] && - !c[:job][:success].nil?) + ["Complete", "Failed", "Cancelled"].include?(c[:job][:state])) if !c[:job] and c[:script_parameters].select { |pname, p| p.is_a? Hash and p[:output_of]}.empty? # No job yet associated with this component and is component inputs @@ -505,7 +523,9 @@ class WhRunPipelineInstance my_submit_id = "instance #{@instance[:uuid]} rand #{rand(2**64).to_s(36)}" job = JobCache.create(@instance, cname, { :script => c[:script], - :script_parameters => c[:script_parameters], + :script_parameters => Hash[c[:script_parameters].map do |key, spec| + [key, spec[:value]] + end], :script_version => c[:script_version], :repository => c[:repository], :nondeterministic => c[:nondeterministic], @@ -513,6 +533,7 @@ class WhRunPipelineInstance :owner_uuid => owner_uuid, :is_locked_by_uuid => (@options[:run_jobs_here] ? owner_uuid : nil), :submit_id => my_submit_id, + :state => (if @options[:run_jobs_here] then "Running" else "Queued" end) }, { # This is the right place to put these attributes when # dealing with new API servers. @@ -533,7 +554,7 @@ class WhRunPipelineInstance end end - if c[:job] and c[:run_in_process] + if c[:job] and c[:run_in_process] and not ["Complete", "Failed", "Cancelled"].include? c[:job][:state] report_status begin require 'open3' @@ -562,28 +583,25 @@ class WhRunPipelineInstance debuglog "Interrupted (#{e}). Failing job.", 0 $arv.job.update(uuid: c[:job][:uuid], job: { - finished_at: Time.now, - running: false, - success: false + state: "Failed" }) end end if c[:job] and c[:job][:uuid] - if (c[:job][:running] or - not (c[:job][:finished_at] or c[:job][:cancelled_at])) - # Job is running so update copy of job record + if ["Running", "Queued"].include?(c[:job][:state]) + # Job is running (or may be soon) so update copy of job record c[:job] = JobCache.get(c[:job][:uuid]) end - if c[:job][:success] + if c[:job][:state] == "Complete" # Populate script_parameters of other components waiting for # this job @components.each do |c2name, c2| c2[:script_parameters].each do |pname, p| if p.is_a? Hash and p[:output_of] == cname.to_s debuglog "parameter #{c2name}::#{pname} == #{c[:job][:output]}" - c2[:script_parameters][pname] = c[:job][:output] + c2[:script_parameters][pname] = {value: c[:job][:output]} moretodo = true end end @@ -593,15 +611,14 @@ class WhRunPipelineInstance # succeeded. (At the top of this loop, I was still # waiting for it to finish.) - debuglog "names: #{@instance[:name]} #{@template[:name]}", 0 - if (not @instance[:name].nil?) and (not @instance[:name].empty?) + if @instance[:name].andand.length.andand > 0 pipeline_name = @instance[:name] - else - fetch_template(@instance[:pipeline_template_uuid]) + elsif @template.andand[:name].andand.length.andand > 0 pipeline_name = @template[:name] + else + pipeline_name = @instance[:uuid] end if c[:output_name] != false - output_name = c[:output_name] || "Output of #{cname} of #{pipeline_name}" # Create a collection located in the same project as the pipeline with the contents of the output. portable_data_hash = c[:job][:output] collections = $arv.collection.list(limit: 1, @@ -609,26 +626,48 @@ class WhRunPipelineInstance select: ["portable_data_hash", "manifest_text"] )[:items] if collections.any? - newcollection = { - owner_uuid: owner_uuid, - name: "#{output_name} at #{c[:job][:finished_at]}", - portable_data_hash: collections.first[:portable_data_hash], - manifest_text: collections.first[:manifest_text] - } - debuglog "Creating collection #{newcollection}", 0 - newcollection_actual = $arv.collection.create collection: newcollection + name = c[:output_name] || "Output #{portable_data_hash[0..7]} of #{cname} of #{pipeline_name}" + + # check if there is a name collision. + name_collisions = $arv.collection.list(filters: [["owner_uuid", "=", owner_uuid], + ["name", "=", name]])[:items] + + newcollection_actual = nil + if name_collisions.any? and name_collisions.first[:portable_data_hash] == portable_data_hash + # There is already a collection with the same name and the + # same contents, so just point to that. + newcollection_actual = name_collisions.first + end + + if newcollection_actual.nil? + # Did not find a collection with the same name (or the + # collection has a different portable data hash) so create + # a new collection with ensure_unique_name: true. + newcollection = { + owner_uuid: owner_uuid, + name: name, + portable_data_hash: collections.first[:portable_data_hash], + manifest_text: collections.first[:manifest_text] + } + debuglog "Creating collection #{newcollection}", 0 + newcollection_actual = $arv.collection.create collection: newcollection, ensure_unique_name: true + end + c[:output_uuid] = newcollection_actual[:uuid] else debuglog "Could not find a collection with portable data hash #{portable_data_hash}", 0 end end end - elsif c[:job][:running] || - (!c[:job][:started_at] && !c[:job][:cancelled_at]) - # Job is still running + elsif ["Queued", "Running"].include? c[:job][:state] + # Job is running or queued to run, so indicate that pipeline + # should continue to run moretodo = true - elsif c[:job][:cancelled_at] + elsif c[:job][:state] == "Cancelled" debuglog "component #{cname} job #{c[:job][:uuid]} cancelled." + moretodo = false + elsif c[:job][:state] == "Failed" + moretodo = false end end end @@ -655,21 +694,12 @@ class WhRunPipelineInstance end end - ended = 0 - succeeded = 0 - failed = 0 - @components.each do |cname, c| - if c[:job] - if c[:job][:finished_at] or c[:job][:cancelled_at] or (c[:job][:running] == false and c[:job][:success] == false) - ended += 1 - if c[:job][:success] == true - succeeded += 1 - elsif c[:job][:success] == false or c[:job][:cancelled_at] - failed += 1 - end - end - end - end + c_in_state = @components.values.group_by { |c| + c[:job] and c[:job][:state] + } + succeeded = c_in_state["Complete"].andand.count || 0 + failed = (c_in_state["Failed"].andand.count || 0) + (c_in_state["Cancelled"].andand.count || 0) + ended = succeeded + failed success = (succeeded == @components.length) @@ -692,6 +722,10 @@ class WhRunPipelineInstance end end + if @instance[:finished_at].nil? and ['Complete', 'Failed'].include? @instance[:state] + @instance[:finished_at] = Time.now + end + debuglog "pipeline instance state is #{@instance[:state]}" # set components_summary @@ -731,20 +765,18 @@ class WhRunPipelineInstance @components.each do |cname, c| jstatus = if !c[:job] "-" - elsif c[:job][:running] - "#{c[:job][:tasks_summary].inspect}" - elsif c[:job][:success] - c[:job][:output] - elsif c[:job][:cancelled_at] - "cancelled #{c[:job][:cancelled_at]}" - elsif c[:job][:finished_at] - "failed #{c[:job][:finished_at]}" - elsif c[:job][:started_at] - "started #{c[:job][:started_at]}" - elsif c[:job][:is_locked_by_uuid] - "starting #{c[:job][:started_at]}" - else - "queued #{c[:job][:created_at]}" + else case c[:job][:state] + when "Running" + "#{c[:job][:tasks_summary].inspect}" + when "Complete" + c[:job][:output] + when "Cancelled" + "cancelled #{c[:job][:cancelled_at]}" + when "Failed" + "failed #{c[:job][:finished_at]}" + when "Queued" + "queued #{c[:job][:created_at]}" + end end f.puts "#{cname.to_s.ljust namewidth} #{c[:job] ? c[:job][:uuid] : '-'.ljust(27)} #{jstatus}" end @@ -757,6 +789,7 @@ class WhRunPipelineInstance if ["New", "Ready", "RunningOnClient", "RunningOnServer"].include?(@instance[:state]) @instance[:state] = "Failed" + @instance[:finished_at] = Time.now @instance.save end @instance.log_stderr(msg)