#!/usr/bin/env ruby
-# == Synopsis
-#
-# arv-run-pipeline-instance --template pipeline-template-uuid [options] [--] [parameters]
-# arv-run-pipeline-instance --instance pipeline-instance-uuid [options]
-#
-# Satisfy a pipeline template by finding or submitting a mapreduce job
-# for each pipeline component.
-#
-# == Options
-#
-# [--template uuid] Use the specified pipeline template.
-#
-# [--template path] Load the pipeline template from the specified
-# local file.
-#
-# [--instance uuid] Use the specified pipeline instance.
-#
-# [-n, --dry-run] Do not start any new jobs or wait for existing jobs
-# to finish. Just find out whether jobs are finished,
-# queued, or running for each component
-#
-# [--submit] Do not try to satisfy any components. Just
-# create an instance, print its UUID to
-# stdout, and exit.
-#
-# [--no-wait] Make only as much progress as possible without entering
-# a sleep/poll loop.
-#
-# [--no-reuse] Do not reuse existing jobs to satisfy pipeline
-# components. Submit a new job for every component.
-#
-# [--debug] Print extra debugging information on stderr.
-#
-# [--debug-level N] Increase amount of debugging information. Default
-# 1, possible range 0..3.
-#
-# [--status-text path] Print plain text status report to a file or
-# fifo. Default: /dev/stdout
-#
-# [--status-json path] Print JSON status report to a file or
-# fifo. Default: /dev/null
-#
-# [--description] Description for the pipeline instance.
-#
-# == Parameters
-#
-# [param_name=param_value]
-#
-# [param_name param_value] Set (or override) the default value for
-# every parameter with the given name.
-#
-# [component_name::param_name=param_value]
-# [component_name::param_name param_value]
-# [--component_name::param_name=param_value]
-# [--component_name::param_name param_value] Set the value of a
-# parameter for a single
-# component.
-#
class WhRunPipelineInstance
end
EOS
end
-$arvados_api_version = ENV['ARVADOS_API_VERSION'] || 'v1'
-$arvados_api_host = ENV['ARVADOS_API_HOST'] or
- abort "#{$0}: fatal: ARVADOS_API_HOST environment variable not set."
-$arvados_api_token = ENV['ARVADOS_API_TOKEN'] or
- abort "#{$0}: fatal: ARVADOS_API_TOKEN environment variable not set."
-
begin
require 'arvados'
require 'rubygems'
require 'trollop'
require 'google/api_client'
rescue LoadError => l
- puts $:
+ $stderr.puts $:
abort <<-EOS
#{$0}: fatal: #{l.message}
Some runtime dependencies may be missing.
$stderr.puts "#{File.split($0).last} #{$$}: #{message}" if $debuglevel >= verbosity
end
-module Kernel
- def suppress_warnings
- original_verbosity = $VERBOSE
- $VERBOSE = nil
- result = yield
- $VERBOSE = original_verbosity
- return result
- end
-end
-
-if $arvados_api_host.match /local/
- # You probably don't care about SSL certificate checks if you're
- # testing with a dev server.
- suppress_warnings { OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE }
-end
-
-
# Parse command line options (the kind that control the behavior of
# this program, that is, not the pipeline component parameters).
p = Trollop::Parser.new do
version __FILE__
+ banner(<<EOF)
+
+Usage:
+ arv-run-pipeline-instance --template TEMPLATE_UUID [options] [--] [parameters]
+ arv-run-pipeline-instance --instance INSTANCE_UUID [options] [--] [parameters]
+
+Parameters:
+ param_name=param_value
+ param_name param_value
+ Set (or override) the default value for every
+ pipeline component parameter with the given
+ name.
+
+ component_name::param_name=param_value
+ component_name::param_name param_value
+ --component_name::param_name=param_value
+ --component_name::param_name param_value
+ Set the value of a parameter for a single
+ pipeline component.
+
+Options:
+EOF
opt(:dry_run,
"Do not start any new jobs or wait for existing jobs to finish. Just find out whether jobs are finished, queued, or running for each component.",
:type => :boolean,
"Description for the pipeline instance.",
:short => :none,
:type => :string)
+ opt(:project_uuid,
+ "UUID of the project for the pipeline instance.",
+ short: :none,
+ type: :string)
stop_on [:'--']
end
$options = Trollop::with_standard_exception_handling p do
abort "#{$0}: syntax error: --instance cannot be combined with --template or --submit."
end
elsif not $options[:template]
- puts "error: you must supply a --template or --instance."
+ $stderr.puts "error: you must supply a --template or --instance."
p.educate
abort
end
abort "#{$0}: error: you must supply --run-pipeline-here, --run-jobs-here, or --submit."
end
-# Suppress SSL certificate checks if ARVADOS_API_HOST_INSECURE
-
-module Kernel
- def suppress_warnings
- original_verbosity = $VERBOSE
- $VERBOSE = nil
- result = yield
- $VERBOSE = original_verbosity
- return result
- end
-end
-
-if ENV['ARVADOS_API_HOST_INSECURE']
- suppress_warnings { OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE }
-end
-
# Set up the API client.
$arv = Arvados.new api_version: 'v1'
},
:authenticated => false,
:headers => {
- authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
+ authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
})
j = JSON.parse result.body, :symbolize_names => true
unless j.is_a? Hash and j[:uuid]
},
:authenticated => false,
:headers => {
- authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
+ authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
})
j = JSON.parse result.body, :symbolize_names => true
unless j.is_a? Hash and j[:uuid]
},
:authenticated => false,
:headers => {
- authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
+ authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
})
j = JSON.parse result.body, :symbolize_names => true
unless j.is_a? Hash and j[:uuid]
},
:authenticated => false,
:headers => {
- authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
+ authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
})
@cache[uuid] = JSON.parse result.body, :symbolize_names => true
end
},
:authenticated => false,
:headers => {
- authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
+ authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
})
list = JSON.parse result.body, :symbolize_names => true
if list and list[:items].is_a? Array
:body_object => body,
:authenticated => false,
:headers => {
- authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
+ authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
})
j = JSON.parse result.body, :symbolize_names => true
if j.is_a? Hash and j[:uuid]
},
:authenticated => false,
:headers => {
- authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
+ authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
})
@template = JSON.parse result.body, :symbolize_names => true
if !@template[:uuid]
@components.each do |componentname, component|
component[:script_parameters].each do |parametername, parameter|
parameter = { :value => parameter } unless parameter.is_a? Hash
- value =
- (params["#{componentname}::#{parametername}"] ||
- parameter[:value] ||
- (parameter[:output_of].nil? &&
- (params[parametername.to_s] ||
- parameter[:default])) ||
- nil)
- if value.nil? and
- ![false,'false',0,'0'].index parameter[:required]
- if parameter[:output_of]
- next
+ if params.has_key?("#{componentname}::#{parametername}")
+ value = params["#{componentname}::#{parametername}"]
+ elsif parameter.has_key?(:value)
+ value = parameter[:value]
+ elsif parameter.has_key?(:output_of)
+ if !@components[parameter[:output_of].intern]
+ errors << [componentname, parametername, "output_of refers to nonexistent component '#{parameter[:output_of]}'"]
+ else
+ # value will be filled in later when the upstream
+ # component's output becomes known
end
+ next
+ elsif params.has_key?(parametername.to_s)
+ value = params[parametername.to_s]
+ elsif parameter.has_key?(:default)
+ value = parameter[:default]
+ elsif [false, 'false', 0, '0'].index(parameter[:required])
+ value = nil
+ else
errors << [componentname, parametername, "required parameter is missing"]
+ next
end
debuglog "parameter #{componentname}::#{parametername} == #{value}"
- component[:script_parameters][parametername] = value
+
+ component[:script_parameters][parametername] =
+ parameter.dup.merge(value: value)
end
end
if !errors.empty?
end
end
else
- description = $options[:description]
- description = ("Created at #{Time.now.localtime}" + (@template[:name].andand.size.andand>0 ? " using the pipeline template *#{@template[:name]}*" : "")) if !description
- @instance = PipelineInstance.
- create(components: @components,
- properties: {
- run_options: {
- enable_job_reuse: !@options[:no_reuse]
- }
- },
- pipeline_template_uuid: @template[:uuid],
- description: description,
- state: ($options[:submit] ? 'RunningOnServer' : 'RunningOnClient'))
+ description = $options[:description] ||
+ ("Created at #{Time.now.localtime}" + (@template[:name].andand.size.andand>0 ? " using the pipeline template *#{@template[:name]}*" : ""))
+ instance_body = {
+ components: @components,
+ properties: {
+ run_options: {
+ enable_job_reuse: !@options[:no_reuse]
+ }
+ },
+ pipeline_template_uuid: @template[:uuid],
+ description: description,
+ state: ($options[:submit] ? 'RunningOnServer' : 'RunningOnClient')
+ }
+ if @options[:project_uuid]
+ instance_body[:owner_uuid] = @options[:project_uuid]
+ end
+ @instance = PipelineInstance.create(instance_body)
end
self
end
# the job's current state")
c_already_finished = (c[:job] &&
c[:job][:uuid] &&
- ["Complete", "Failed", "Cancelled"].include? c[:job][:state])
+ ["Complete", "Failed", "Cancelled"].include?(c[:job][:state]))
if !c[:job] and
c[:script_parameters].select { |pname, p| p.is_a? Hash and p[:output_of]}.empty?
# No job yet associated with this component and is component inputs
my_submit_id = "instance #{@instance[:uuid]} rand #{rand(2**64).to_s(36)}"
job = JobCache.create(@instance, cname, {
:script => c[:script],
- :script_parameters => c[:script_parameters],
+ :script_parameters => Hash[c[:script_parameters].map do |key, spec|
+ [key, spec[:value]]
+ end],
:script_version => c[:script_version],
:repository => c[:repository],
:nondeterministic => c[:nondeterministic],
:owner_uuid => owner_uuid,
:is_locked_by_uuid => (@options[:run_jobs_here] ? owner_uuid : nil),
:submit_id => my_submit_id,
- :state => (if @options[:run_jobs_here] then "Running" else "Queued")
+ :state => (if @options[:run_jobs_here] then "Running" else "Queued" end)
}, {
# This is the right place to put these attributes when
# dealing with new API servers.
end
if c[:job] and c[:job][:uuid]
- if c[:job][:state] == "Running"
- # Job is running so update copy of job record
+ if ["Running", "Queued"].include?(c[:job][:state])
+ # Job is running (or may be soon) so update copy of job record
c[:job] = JobCache.get(c[:job][:uuid])
end
c2[:script_parameters].each do |pname, p|
if p.is_a? Hash and p[:output_of] == cname.to_s
debuglog "parameter #{c2name}::#{pname} == #{c[:job][:output]}"
- c2[:script_parameters][pname] = c[:job][:output]
+ c2[:script_parameters][pname] = {value: c[:job][:output]}
moretodo = true
end
end
end
end
end
- elsif c[:job][:state] == "Running"
- # Job is still running
+ elsif ["Queued", "Running"].include? c[:job][:state]
+ # Job is running or queued to run, so indicate that pipeline
+ # should continue to run
moretodo = true
- elsif c[:job][:cancelled_at]
+ elsif c[:job][:state] == "Cancelled"
debuglog "component #{cname} job #{c[:job][:uuid]} cancelled."
+ moretodo = false
+ elsif c[:job][:state] == "Failed"
+ moretodo = false
end
end
end
end
end
- ended = @components.map { |cname, c|
- if c[:job] and ["Complete", "Failed", "Cancelled"].include? c[:job][:state] then 1 else 0 end
- }.reduce(:+) || 0
-
- succeeded = @components.map { |cname, c|
- if c[:job] and ["Complete"].include? c[:job][:state] then 1 else 0 end
- }.reduce(:+) || 0
-
- failed = @components.map { |cname, c|
- if c[:job] and ["Failed", "Cancelled"].include? c[:job][:state] then 1 else 0 end
- }.reduce(:+) || 0
+ c_in_state = @components.values.group_by { |c|
+ c[:job] and c[:job][:state]
+ }
+ succeeded = c_in_state["Complete"].andand.count || 0
+ failed = (c_in_state["Failed"].andand.count || 0) + (c_in_state["Cancelled"].andand.count || 0)
+ ended = succeeded + failed
success = (succeeded == @components.length)
@components.each do |cname, c|
jstatus = if !c[:job]
"-"
- elsif c[:job][:state] == "Running"
- "#{c[:job][:tasks_summary].inspect}"
- elsif c[:job][:state] == "Complete"
- c[:job][:output]
- elsif c[:job][:state] == "Cancelled"
- "cancelled #{c[:job][:cancelled_at]}"
- elsif c[:job][:state] == "Failed"
- "failed #{c[:job][:finished_at]}"
- elsif c[:job][:state] == "Queued"
- "queued #{c[:job][:created_at]}"
+ else case c[:job][:state]
+ when "Running"
+ "#{c[:job][:tasks_summary].inspect}"
+ when "Complete"
+ c[:job][:output]
+ when "Cancelled"
+ "cancelled #{c[:job][:cancelled_at]}"
+ when "Failed"
+ "failed #{c[:job][:finished_at]}"
+ when "Queued"
+ "queued #{c[:job][:created_at]}"
+ end
end
f.puts "#{cname.to_s.ljust namewidth} #{c[:job] ? c[:job][:uuid] : '-'.ljust(27)} #{jstatus}"
end