closes #7710
[arvados.git] / services / api / app / controllers / arvados / v1 / jobs_controller.rb
1 class Arvados::V1::JobsController < ApplicationController
2   accept_attribute_as_json :script_parameters, Hash
3   accept_attribute_as_json :runtime_constraints, Hash
4   accept_attribute_as_json :tasks_summary, Hash
5   skip_before_filter :find_object_by_uuid, :only => [:queue, :queue_size]
6   skip_before_filter :render_404_if_no_object, :only => [:queue, :queue_size]
7
8   include DbCurrentTime
9
10   def create
11     [:repository, :script, :script_version, :script_parameters].each do |r|
12       if !resource_attrs[r]
13         return send_error("#{r} attribute must be specified",
14                           status: :unprocessable_entity)
15       end
16     end
17
18     # We used to ask for the minimum_, exclude_, and no_reuse params
19     # in the job resource. Now we advertise them as flags that alter
20     # the behavior of the create action.
21     [:minimum_script_version, :exclude_script_versions].each do |attr|
22       if resource_attrs.has_key? attr
23         params[attr] = resource_attrs.delete attr
24       end
25     end
26     if resource_attrs.has_key? :no_reuse
27       params[:find_or_create] = !resource_attrs.delete(:no_reuse)
28     end
29
30     if params[:find_or_create]
31       return if false.equal?(load_filters_param)
32       if @filters.empty?  # Translate older creation parameters into filters.
33         @filters =
34           [["repository", "=", resource_attrs[:repository]],
35            ["script", "=", resource_attrs[:script]],
36            ["script_version", "not in git", params[:exclude_script_versions]],
37           ].reject { |filter| filter.last.nil? or filter.last.empty? }
38         if !params[:minimum_script_version].blank?
39           @filters << ["script_version", "in git",
40                        params[:minimum_script_version]]
41         else
42           add_default_git_filter("script_version", resource_attrs[:repository],
43                                  resource_attrs[:script_version])
44         end
45         if image_search = resource_attrs[:runtime_constraints].andand["docker_image"]
46           if image_tag = resource_attrs[:runtime_constraints]["docker_image_tag"]
47             image_search += ":#{image_tag}"
48           end
49           image_locator = Collection.
50             for_latest_docker_image(image_search).andand.portable_data_hash
51         else
52           image_locator = nil
53         end
54         @filters << ["docker_image_locator", "=", image_locator]
55         if sdk_version = resource_attrs[:runtime_constraints].andand["arvados_sdk_version"]
56           add_default_git_filter("arvados_sdk_version", "arvados", sdk_version)
57         end
58         begin
59           load_job_specific_filters
60         rescue ArgumentError => error
61           return send_error(error.message)
62         end
63       end
64
65       # Check specified filters for some reasonableness.
66       filter_names = @filters.map { |f| f.first }.uniq
67       ["repository", "script"].each do |req_filter|
68         if not filter_names.include?(req_filter)
69           return send_error("#{req_filter} filter required")
70         end
71       end
72
73       # Search for a reusable Job, and return it if found.
74       @objects = Job.readable_by(current_user)
75       apply_filters
76       @object = nil
77       incomplete_job = nil
78       @objects.each do |j|
79         if j.nondeterministic != true and
80             ["Queued", "Running", "Complete"].include?(j.state) and
81             j.script_parameters == resource_attrs[:script_parameters]
82           if j.state != "Complete" && j.owner_uuid == current_user.uuid
83             # We'll use this if we don't find a job that has completed
84             incomplete_job ||= j
85           else
86             if Collection.readable_by(current_user).find_by_portable_data_hash(j.output)
87               # Record the first job in the list
88               if !@object
89                 @object = j
90               end
91               # Ensure that all candidate jobs actually did produce the same output
92               if @object.output != j.output
93                 @object = nil
94                 break
95               end
96             end
97           end
98         end
99         @object ||= incomplete_job
100         if @object
101           return show
102         end
103       end
104     end
105
106     super
107   end
108
109   def cancel
110     reload_object_before_update
111     @object.update_attributes! state: Job::Cancelled
112     show
113   end
114
115   def lock
116     @object.lock current_user.uuid
117     show
118   end
119
120   class LogStreamer
121     Q_UPDATE_INTERVAL = 12
122     def initialize(job, opts={})
123       @job = job
124       @opts = opts
125     end
126     def each
127       if @job.finished_at
128         yield "#{@job.uuid} finished at #{@job.finished_at}\n"
129         return
130       end
131       while not @job.started_at
132         # send a summary (job queue + available nodes) to the client
133         # every few seconds while waiting for the job to start
134         current_time = db_current_time
135         last_ack_at ||= current_time - Q_UPDATE_INTERVAL - 1
136         if current_time - last_ack_at >= Q_UPDATE_INTERVAL
137           nodes_in_state = {idle: 0, alloc: 0}
138           ActiveRecord::Base.uncached do
139             Node.where('hostname is not ?', nil).collect do |n|
140               if n.info[:slurm_state]
141                 nodes_in_state[n.info[:slurm_state]] ||= 0
142                 nodes_in_state[n.info[:slurm_state]] += 1
143               end
144             end
145           end
146           job_queue = Job.queue
147           n_queued_before_me = 0
148           job_queue.each do |j|
149             break if j.uuid == @job.uuid
150             n_queued_before_me += 1
151           end
152           yield "#{db_current_time}" \
153             " job #{@job.uuid}" \
154             " queue_position #{n_queued_before_me}" \
155             " queue_size #{job_queue.size}" \
156             " nodes_idle #{nodes_in_state[:idle]}" \
157             " nodes_alloc #{nodes_in_state[:alloc]}\n"
158           last_ack_at = db_current_time
159         end
160         sleep 3
161         ActiveRecord::Base.uncached do
162           @job.reload
163         end
164       end
165     end
166   end
167
168   def queue
169     params[:order] ||= ['priority desc', 'created_at']
170     load_limit_offset_order_params
171     load_where_param
172     @where.merge!({state: Job::Queued})
173     return if false.equal?(load_filters_param)
174     find_objects_for_index
175     index
176   end
177
178   def queue_size
179     # Users may not be allowed to see all the jobs in the queue, so provide a
180     # method to get just the queue size in order to get a gist of how busy the
181     # cluster is.
182     render :json => {:queue_size => Job.queue.size}
183   end
184
185   def self._create_requires_parameters
186     (super rescue {}).
187       merge({
188               find_or_create: {
189                 type: 'boolean', required: false, default: false
190               },
191               filters: {
192                 type: 'array', required: false
193               },
194               minimum_script_version: {
195                 type: 'string', required: false
196               },
197               exclude_script_versions: {
198                 type: 'array', required: false
199               },
200             })
201   end
202
203   def self._queue_requires_parameters
204     self._index_requires_parameters
205   end
206
207   protected
208
209   def add_default_git_filter(attr_name, repo_name, refspec)
210     # Add a filter to @filters for `attr_name` = the latest commit available
211     # in `repo_name` at `refspec`.  No filter is added if refspec can't be
212     # resolved.
213     commits = Commit.find_commit_range(repo_name, nil, refspec, nil)
214     if commit_hash = commits.first
215       @filters << [attr_name, "=", commit_hash]
216     end
217   end
218
219   def load_job_specific_filters
220     # Convert Job-specific @filters entries into general SQL filters.
221     script_info = {"repository" => nil, "script" => nil}
222     git_filters = Hash.new do |hash, key|
223       hash[key] = {"max_version" => "HEAD", "exclude_versions" => []}
224     end
225     @filters.select! do |(attr, operator, operand)|
226       if (script_info.has_key? attr) and (operator == "=")
227         if script_info[attr].nil?
228           script_info[attr] = operand
229         elsif script_info[attr] != operand
230           raise ArgumentError.new("incompatible #{attr} filters")
231         end
232       end
233       case operator
234       when "in git"
235         git_filters[attr]["min_version"] = operand
236         false
237       when "not in git"
238         git_filters[attr]["exclude_versions"] += Array.wrap(operand)
239         false
240       when "in docker", "not in docker"
241         image_hashes = Array.wrap(operand).flat_map do |search_term|
242           image_search, image_tag = search_term.split(':', 2)
243           Collection.
244             find_all_for_docker_image(image_search, image_tag, @read_users).
245             map(&:portable_data_hash)
246         end
247         @filters << [attr, operator.sub(/ docker$/, ""), image_hashes]
248         false
249       else
250         true
251       end
252     end
253
254     # Build a real script_version filter from any "not? in git" filters.
255     git_filters.each_pair do |attr, filter|
256       case attr
257       when "script_version"
258         script_info.each_pair do |key, value|
259           if value.nil?
260             raise ArgumentError.new("script_version filter needs #{key} filter")
261           end
262         end
263         filter["repository"] = script_info["repository"]
264         begin
265           filter["max_version"] = resource_attrs[:script_version]
266         rescue
267           # Using HEAD, set earlier by the hash default, is fine.
268         end
269       when "arvados_sdk_version"
270         filter["repository"] = "arvados"
271       else
272         raise ArgumentError.new("unknown attribute for git filter: #{attr}")
273       end
274       revisions = Commit.find_commit_range(filter["repository"],
275                                            filter["min_version"],
276                                            filter["max_version"],
277                                            filter["exclude_versions"])
278       if revisions.empty?
279         raise ArgumentError.
280           new("error searching #{filter['repository']} from " +
281               "'#{filter['min_version']}' to '#{filter['max_version']}', " +
282               "excluding #{filter['exclude_versions']}")
283       end
284       @filters.append([attr, "in", revisions])
285     end
286   end
287
288   def load_filters_param
289     begin
290       super
291       load_job_specific_filters
292     rescue ArgumentError => error
293       send_error(error.message)
294       false
295     end
296   end
297 end