X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/9ad5b41bb28617a014410afde9eb90f758f491ef..548e0c54db524cb7317850d4dfd8f3ee0b93cdb0:/sdk/cli/bin/arv-run-pipeline-instance diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance index 05fce2c9da..6dc82c5a20 100755 --- a/sdk/cli/bin/arv-run-pipeline-instance +++ b/sdk/cli/bin/arv-run-pipeline-instance @@ -1,80 +1,14 @@ #!/usr/bin/env ruby -# == Synopsis -# -# arv-run-pipeline-instance --template pipeline-template-uuid [options] [--] [parameters] -# arv-run-pipeline-instance --instance pipeline-instance-uuid [options] -# -# Satisfy a pipeline template by finding or submitting a mapreduce job -# for each pipeline component. -# -# == Options -# -# [--template uuid] Use the specified pipeline template. -# -# [--template path] Load the pipeline template from the specified -# local file. -# -# [--instance uuid] Use the specified pipeline instance. -# -# [-n, --dry-run] Do not start any new jobs or wait for existing jobs -# to finish. Just find out whether jobs are finished, -# queued, or running for each component -# -# [--submit] Do not try to satisfy any components. Just -# create an instance, print its UUID to -# stdout, and exit. -# -# [--no-wait] Make only as much progress as possible without entering -# a sleep/poll loop. -# -# [--no-reuse] Do not reuse existing jobs to satisfy pipeline -# components. Submit a new job for every component. -# -# [--debug] Print extra debugging information on stderr. -# -# [--debug-level N] Increase amount of debugging information. Default -# 1, possible range 0..3. -# -# [--status-text path] Print plain text status report to a file or -# fifo. Default: /dev/stdout -# -# [--status-json path] Print JSON status report to a file or -# fifo. Default: /dev/null -# -# [--description] Description for the pipeline instance. -# -# == Parameters -# -# [param_name=param_value] -# -# [param_name param_value] Set (or override) the default value for -# every parameter with the given name. -# -# [component_name::param_name=param_value] -# [component_name::param_name param_value] -# [--component_name::param_name=param_value] -# [--component_name::param_name param_value] Set the value of a -# parameter for a single -# component. -# class WhRunPipelineInstance end -$application_version = 1.0 - if RUBY_VERSION < '1.9.3' then abort <<-EOS #{$0.gsub(/^\.\//,'')} requires Ruby version 1.9.3 or higher. EOS end -$arvados_api_version = ENV['ARVADOS_API_VERSION'] || 'v1' -$arvados_api_host = ENV['ARVADOS_API_HOST'] or - abort "#{$0}: fatal: ARVADOS_API_HOST environment variable not set." -$arvados_api_token = ENV['ARVADOS_API_TOKEN'] or - abort "#{$0}: fatal: ARVADOS_API_TOKEN environment variable not set." - begin require 'arvados' require 'rubygems' @@ -83,7 +17,7 @@ begin require 'trollop' require 'google/api_client' rescue LoadError => l - puts $: + $stderr.puts $: abort <<-EOS #{$0}: fatal: #{l.message} Some runtime dependencies may be missing. @@ -95,43 +29,33 @@ def debuglog(message, verbosity=1) $stderr.puts "#{File.split($0).last} #{$$}: #{message}" if $debuglevel >= verbosity end -module Kernel - def suppress_warnings - original_verbosity = $VERBOSE - $VERBOSE = nil - result = yield - $VERBOSE = original_verbosity - return result - end -end - -if $arvados_api_host.match /local/ - # You probably don't care about SSL certificate checks if you're - # testing with a dev server. - suppress_warnings { OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE } -end - -class Google::APIClient - def discovery_document(api, version) - api = api.to_s - return @discovery_documents["#{api}:#{version}"] ||= - begin - response = self.execute!( - :http_method => :get, - :uri => self.discovery_uri(api, version), - :authenticated => false - ) - response.body.class == String ? JSON.parse(response.body) : response.body - end - end -end - - # Parse command line options (the kind that control the behavior of # this program, that is, not the pipeline component parameters). p = Trollop::Parser.new do version __FILE__ + banner(< :boolean, @@ -170,17 +94,29 @@ p = Trollop::Parser.new do :short => :none, :type => :string) opt(:submit, - "Do not try to satisfy any components. Just create a pipeline instance and output its UUID.", + "Submit the pipeline instance to the server, and exit. Let the Crunch dispatch service satisfy the components by finding/running jobs.", + :short => :none, + :type => :boolean) + opt(:run_pipeline_here, + "Manage the pipeline instance in-process. Submit jobs to Crunch as needed. Do not exit until the pipeline finishes (or fails).", + :short => :none, + :type => :boolean) + opt(:run_jobs_here, + "Run jobs in the local terminal session instead of submitting them to Crunch. Implies --run-pipeline-here. Note: this results in a significantly different job execution environment, and some Crunch features are not supported. It can be necessary to modify a pipeline in order to make it run this way.", :short => :none, :type => :boolean) opt(:run_here, - "Manage the pipeline in process.", + "Synonym for --run-jobs-here.", :short => :none, :type => :boolean) opt(:description, "Description for the pipeline instance.", :short => :none, :type => :string) + opt(:project_uuid, + "UUID of the project for the pipeline instance.", + short: :none, + type: :string) stop_on [:'--'] end $options = Trollop::with_standard_exception_handling p do @@ -188,45 +124,28 @@ $options = Trollop::with_standard_exception_handling p do end $debuglevel = $options[:debug_level] || ($options[:debug] && 1) || 0 +$options[:run_jobs_here] ||= $options[:run_here] # old flag name +$options[:run_pipeline_here] ||= $options[:run_jobs_here] # B requires A + if $options[:instance] if $options[:template] or $options[:submit] abort "#{$0}: syntax error: --instance cannot be combined with --template or --submit." end elsif not $options[:template] - puts "error: you must supply a --template or --instance." + $stderr.puts "error: you must supply a --template or --instance." p.educate abort end -if $options[:run_here] == $options[:submit] - abort "#{$0}: syntax error: you must supply either --run-here or --submit." -end - -# Suppress SSL certificate checks if ARVADOS_API_HOST_INSECURE - -module Kernel - def suppress_warnings - original_verbosity = $VERBOSE - $VERBOSE = nil - result = yield - $VERBOSE = original_verbosity - return result - end -end - -if ENV['ARVADOS_API_HOST_INSECURE'] - suppress_warnings { OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE } +if $options[:run_pipeline_here] == $options[:submit] + abort "#{$0}: error: you must supply --run-pipeline-here, --run-jobs-here, or --submit." end # Set up the API client. -$client ||= Google::APIClient. - new(:host => $arvados_api_host, - :application_name => File.split($0).last, - :application_version => $application_version.to_s) -$arvados = $client.discovered_api('arvados', $arvados_api_version) $arv = Arvados.new api_version: 'v1' - +$client = $arv.client +$arvados = $arv.arvados_api class PipelineInstance def self.find(uuid) @@ -236,7 +155,7 @@ class PipelineInstance }, :authenticated => false, :headers => { - authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN'] }) j = JSON.parse result.body, :symbolize_names => true unless j.is_a? Hash and j[:uuid] @@ -254,7 +173,7 @@ class PipelineInstance }, :authenticated => false, :headers => { - authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN'] }) j = JSON.parse result.body, :symbolize_names => true unless j.is_a? Hash and j[:uuid] @@ -273,7 +192,7 @@ class PipelineInstance }, :authenticated => false, :headers => { - authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN'] }) j = JSON.parse result.body, :symbolize_names => true unless j.is_a? Hash and j[:uuid] @@ -317,7 +236,7 @@ class JobCache }, :authenticated => false, :headers => { - authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN'] }) @cache[uuid] = JSON.parse result.body, :symbolize_names => true end @@ -329,7 +248,7 @@ class JobCache }, :authenticated => false, :headers => { - authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN'] }) list = JSON.parse result.body, :symbolize_names => true if list and list[:items].is_a? Array @@ -347,7 +266,7 @@ class JobCache :body_object => body, :authenticated => false, :headers => { - authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN'] }) j = JSON.parse result.body, :symbolize_names => true if j.is_a? Hash and j[:uuid] @@ -391,7 +310,7 @@ class WhRunPipelineInstance }, :authenticated => false, :headers => { - authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN'] + authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN'] }) @template = JSON.parse result.body, :symbolize_names => true if !@template[:uuid] @@ -455,12 +374,17 @@ class WhRunPipelineInstance if value.nil? and ![false,'false',0,'0'].index parameter[:required] if parameter[:output_of] + if not @components[parameter[:output_of].intern] + errors << [componentname, parametername, "output_of refers to nonexistent component '#{parameter[:output_of]}'"] + end next end errors << [componentname, parametername, "required parameter is missing"] end debuglog "parameter #{componentname}::#{parametername} == #{value}" - component[:script_parameters][parametername] = value + + component[:script_parameters][parametername] = + parameter.dup.merge(value: value) end end if !errors.empty? @@ -484,18 +408,23 @@ class WhRunPipelineInstance end end else - description = $options[:description] - description = ("Created at #{Time.now.localtime}" + (@template[:name].andand.size.andand>0 ? " using the pipeline template *#{@template[:name]}*" : "")) if !description - @instance = PipelineInstance. - create(components: @components, - properties: { - run_options: { - enable_job_reuse: !@options[:no_reuse] - } - }, - pipeline_template_uuid: @template[:uuid], - description: description, - state: ($options[:submit] ? 'RunningOnServer' : 'RunningOnClient')) + description = $options[:description] || + ("Created at #{Time.now.localtime}" + (@template[:name].andand.size.andand>0 ? " using the pipeline template *#{@template[:name]}*" : "")) + instance_body = { + components: @components, + properties: { + run_options: { + enable_job_reuse: !@options[:no_reuse] + } + }, + pipeline_template_uuid: @template[:uuid], + description: description, + state: ($options[:submit] ? 'RunningOnServer' : 'RunningOnClient') + } + if @options[:project_uuid] + instance_body[:owner_uuid] = @options[:project_uuid] + end + @instance = PipelineInstance.create(instance_body) end self end @@ -519,20 +448,26 @@ class WhRunPipelineInstance # the job's current state") c_already_finished = (c[:job] && c[:job][:uuid] && - !c[:job][:success].nil?) + ["Complete", "Failed", "Cancelled"].include?(c[:job][:state])) if !c[:job] and c[:script_parameters].select { |pname, p| p.is_a? Hash and p[:output_of]}.empty? # No job yet associated with this component and is component inputs # are fully specified (any output_of script_parameters are resolved # to real value) + my_submit_id = "instance #{@instance[:uuid]} rand #{rand(2**64).to_s(36)}" job = JobCache.create(@instance, cname, { :script => c[:script], - :script_parameters => c[:script_parameters], + :script_parameters => Hash[c[:script_parameters].map do |key, spec| + [key, spec[:value]] + end], :script_version => c[:script_version], :repository => c[:repository], :nondeterministic => c[:nondeterministic], :runtime_constraints => c[:runtime_constraints], :owner_uuid => owner_uuid, + :is_locked_by_uuid => (@options[:run_jobs_here] ? owner_uuid : nil), + :submit_id => my_submit_id, + :state => (if @options[:run_jobs_here] then "Running" else "Queued" end) }, { # This is the right place to put these attributes when # dealing with new API servers. @@ -545,27 +480,62 @@ class WhRunPipelineInstance if job debuglog "component #{cname} new job #{job[:uuid]}" c[:job] = job + c[:run_in_process] = (@options[:run_jobs_here] and + job[:submit_id] == my_submit_id) else debuglog "component #{cname} new job failed", 0 job_creation_failed += 1 end end + if c[:job] and c[:run_in_process] and not ["Complete", "Failed", "Cancelled"].include? c[:job][:state] + report_status + begin + require 'open3' + Open3.popen3("arv-crunch-job", "--force-unlock", + "--job", c[:job][:uuid]) do |stdin, stdout, stderr, wait_thr| + debuglog "arv-crunch-job pid #{wait_thr.pid} started", 0 + stdin.close + while true + rready, wready, = IO.select([stdout, stderr], []) + break if !rready[0] + begin + buf = rready[0].read_nonblock(2**20) + rescue EOFError + break + end + (rready[0] == stdout ? $stdout : $stderr).write(buf) + end + stdout.close + stderr.close + debuglog "arv-crunch-job pid #{wait_thr.pid} exit #{wait_thr.value.to_i}", 0 + end + if not $arv.job.get(uuid: c[:job][:uuid])[:finished_at] + raise Exception.new("arv-crunch-job did not set finished_at.") + end + rescue Exception => e + debuglog "Interrupted (#{e}). Failing job.", 0 + $arv.job.update(uuid: c[:job][:uuid], + job: { + state: "Failed" + }) + end + end + if c[:job] and c[:job][:uuid] - if (c[:job][:running] or - not (c[:job][:finished_at] or c[:job][:cancelled_at])) - # Job is running so update copy of job record + if ["Running", "Queued"].include?(c[:job][:state]) + # Job is running (or may be soon) so update copy of job record c[:job] = JobCache.get(c[:job][:uuid]) end - if c[:job][:success] + if c[:job][:state] == "Complete" # Populate script_parameters of other components waiting for # this job @components.each do |c2name, c2| c2[:script_parameters].each do |pname, p| if p.is_a? Hash and p[:output_of] == cname.to_s debuglog "parameter #{c2name}::#{pname} == #{c[:job][:output]}" - c2[:script_parameters][pname] = c[:job][:output] + c2[:script_parameters][pname] = {value: c[:job][:output]} moretodo = true end end @@ -575,14 +545,12 @@ class WhRunPipelineInstance # succeeded. (At the top of this loop, I was still # waiting for it to finish.) - debuglog "names: #{@instance[:name]} #{@template[:name]}", 0 - if (not @instance[:name].nil?) and (not @instance[:name].empty?) + if @instance[:name].andand.length.andand > 0 pipeline_name = @instance[:name] - elsif @instance[:pipeline_template_uuid] - fetch_template(@instance[:pipeline_template_uuid]) + elsif @template.andand[:name].andand.length.andand > 0 pipeline_name = @template[:name] else - pipeline_name = "pipeline started #{@instance[:started_at]}" + pipeline_name = @instance[:uuid] end if c[:output_name] != false # Create a collection located in the same project as the pipeline with the contents of the output. @@ -625,12 +593,15 @@ class WhRunPipelineInstance end end end - elsif c[:job][:running] || - (!c[:job][:started_at] && !c[:job][:cancelled_at]) - # Job is still running + elsif ["Queued", "Running"].include? c[:job][:state] + # Job is running or queued to run, so indicate that pipeline + # should continue to run moretodo = true - elsif c[:job][:cancelled_at] + elsif c[:job][:state] == "Cancelled" debuglog "component #{cname} job #{c[:job][:uuid]} cancelled." + moretodo = false + elsif c[:job][:state] == "Failed" + moretodo = false end end end @@ -657,21 +628,12 @@ class WhRunPipelineInstance end end - ended = 0 - succeeded = 0 - failed = 0 - @components.each do |cname, c| - if c[:job] - if c[:job][:finished_at] or c[:job][:cancelled_at] or (c[:job][:running] == false and c[:job][:success] == false) - ended += 1 - if c[:job][:success] == true - succeeded += 1 - elsif c[:job][:success] == false or c[:job][:cancelled_at] - failed += 1 - end - end - end - end + c_in_state = @components.values.group_by { |c| + c[:job] and c[:job][:state] + } + succeeded = c_in_state["Complete"].andand.count || 0 + failed = (c_in_state["Failed"].andand.count || 0) + (c_in_state["Cancelled"].andand.count || 0) + ended = succeeded + failed success = (succeeded == @components.length) @@ -737,18 +699,18 @@ class WhRunPipelineInstance @components.each do |cname, c| jstatus = if !c[:job] "-" - elsif c[:job][:running] - "#{c[:job][:tasks_summary].inspect}" - elsif c[:job][:success] - c[:job][:output] - elsif c[:job][:cancelled_at] - "cancelled #{c[:job][:cancelled_at]}" - elsif c[:job][:finished_at] - "failed #{c[:job][:finished_at]}" - elsif c[:job][:started_at] - "started #{c[:job][:started_at]}" - else - "queued #{c[:job][:created_at]}" + else case c[:job][:state] + when "Running" + "#{c[:job][:tasks_summary].inspect}" + when "Complete" + c[:job][:output] + when "Cancelled" + "cancelled #{c[:job][:cancelled_at]}" + when "Failed" + "failed #{c[:job][:finished_at]}" + when "Queued" + "queued #{c[:job][:created_at]}" + end end f.puts "#{cname.to_s.ljust namewidth} #{c[:job] ? c[:job][:uuid] : '-'.ljust(27)} #{jstatus}" end