3859: Implement Job lock method on api server. This takes a queued job and
[arvados.git] / services / api / app / controllers / arvados / v1 / jobs_controller.rb
1 class Arvados::V1::JobsController < ApplicationController
2   accept_attribute_as_json :script_parameters, Hash
3   accept_attribute_as_json :runtime_constraints, Hash
4   accept_attribute_as_json :tasks_summary, Hash
5   skip_before_filter :find_object_by_uuid, :only => [:queue, :queue_size]
6   skip_before_filter :render_404_if_no_object, :only => [:queue, :queue_size]
7
8   def create
9     [:repository, :script, :script_version, :script_parameters].each do |r|
10       if !resource_attrs[r]
11         return send_error("#{r} attribute must be specified",
12                           status: :unprocessable_entity)
13       end
14     end
15
16     # We used to ask for the minimum_, exclude_, and no_reuse params
17     # in the job resource. Now we advertise them as flags that alter
18     # the behavior of the create action.
19     [:minimum_script_version, :exclude_script_versions].each do |attr|
20       if resource_attrs.has_key? attr
21         params[attr] = resource_attrs.delete attr
22       end
23     end
24     if resource_attrs.has_key? :no_reuse
25       params[:find_or_create] = !resource_attrs.delete(:no_reuse)
26     end
27
28     if params[:find_or_create]
29       return if false.equal?(load_filters_param)
30       if @filters.empty?  # Translate older creation parameters into filters.
31         @filters =
32           [["repository", "=", resource_attrs[:repository]],
33            ["script", "=", resource_attrs[:script]],
34            ["script_version", "in git",
35             params[:minimum_script_version] || resource_attrs[:script_version]],
36            ["script_version", "not in git", params[:exclude_script_versions]],
37           ].reject { |filter| filter.last.nil? or filter.last.empty? }
38         if image_search = resource_attrs[:runtime_constraints].andand["docker_image"]
39           if image_tag = resource_attrs[:runtime_constraints]["docker_image_tag"]
40             image_search += ":#{image_tag}"
41           end
42           @filters.append(["docker_image_locator", "in docker", image_search])
43         else
44           @filters.append(["docker_image_locator", "=", nil])
45         end
46         begin
47           load_job_specific_filters
48         rescue ArgumentError => error
49           return send_error(error.message)
50         end
51       end
52
53       # Check specified filters for some reasonableness.
54       filter_names = @filters.map { |f| f.first }.uniq
55       ["repository", "script"].each do |req_filter|
56         if not filter_names.include?(req_filter)
57           return send_error("#{req_filter} filter required")
58         end
59       end
60
61       # Search for a reusable Job, and return it if found.
62       @objects = Job.readable_by(current_user)
63       apply_filters
64       @object = nil
65       incomplete_job = nil
66       @objects.each do |j|
67         if j.nondeterministic != true and
68             ((j.success == true and j.output != nil) or j.running == true) and
69             j.script_parameters == resource_attrs[:script_parameters]
70           if j.running && j.owner_uuid == current_user.uuid
71             # We'll use this if we don't find a job that has completed
72             incomplete_job ||= j
73           else
74             if Collection.readable_by(current_user).find_by_portable_data_hash(j.output)
75               # Record the first job in the list
76               if !@object
77                 @object = j
78               end
79               # Ensure that all candidate jobs actually did produce the same output
80               if @object.output != j.output
81                 @object = nil
82                 break
83               end
84             end
85           end
86         end
87         @object ||= incomplete_job
88         if @object
89           return show
90         end
91       end
92     end
93
94     super
95   end
96
97   def cancel
98     reload_object_before_update
99     @object.update_attributes! cancelled_at: Time.now
100     show
101   end
102
103   def lock
104     @object.lock current_user.uuid
105     show
106   end
107
108   class LogStreamer
109     Q_UPDATE_INTERVAL = 12
110     def initialize(job, opts={})
111       @job = job
112       @opts = opts
113     end
114     def each
115       if @job.finished_at
116         yield "#{@job.uuid} finished at #{@job.finished_at}\n"
117         return
118       end
119       while not @job.started_at
120         # send a summary (job queue + available nodes) to the client
121         # every few seconds while waiting for the job to start
122         last_ack_at ||= Time.now - Q_UPDATE_INTERVAL - 1
123         if Time.now - last_ack_at >= Q_UPDATE_INTERVAL
124           nodes_in_state = {idle: 0, alloc: 0}
125           ActiveRecord::Base.uncached do
126             Node.where('hostname is not ?', nil).collect do |n|
127               if n.info[:slurm_state]
128                 nodes_in_state[n.info[:slurm_state]] ||= 0
129                 nodes_in_state[n.info[:slurm_state]] += 1
130               end
131             end
132           end
133           job_queue = Job.queue
134           n_queued_before_me = 0
135           job_queue.each do |j|
136             break if j.uuid == @job.uuid
137             n_queued_before_me += 1
138           end
139           yield "#{Time.now}" \
140             " job #{@job.uuid}" \
141             " queue_position #{n_queued_before_me}" \
142             " queue_size #{job_queue.size}" \
143             " nodes_idle #{nodes_in_state[:idle]}" \
144             " nodes_alloc #{nodes_in_state[:alloc]}\n"
145           last_ack_at = Time.now
146         end
147         sleep 3
148         ActiveRecord::Base.uncached do
149           @job.reload
150         end
151       end
152     end
153   end
154
155   def queue
156     params[:order] ||= ['priority desc', 'created_at']
157     load_limit_offset_order_params
158     load_where_param
159     @where.merge!({
160                     started_at: nil,
161                     is_locked_by_uuid: nil,
162                     cancelled_at: nil,
163                     success: nil
164                   })
165     return if false.equal?(load_filters_param)
166     find_objects_for_index
167     index
168   end
169
170   def queue_size
171     # Users may not be allowed to see all the jobs in the queue, so provide a
172     # method to get just the queue size in order to get a gist of how busy the
173     # cluster is.
174     render :json => {:queue_size => Job.queue.size}
175   end
176
177   def self._create_requires_parameters
178     (super rescue {}).
179       merge({
180               find_or_create: {
181                 type: 'boolean', required: false, default: false
182               },
183               filters: {
184                 type: 'array', required: false
185               },
186               minimum_script_version: {
187                 type: 'string', required: false
188               },
189               exclude_script_versions: {
190                 type: 'array', required: false
191               },
192             })
193   end
194
195   def self._queue_requires_parameters
196     self._index_requires_parameters
197   end
198
199   protected
200
201   def load_job_specific_filters
202     # Convert Job-specific @filters entries into general SQL filters.
203     script_info = {"repository" => nil, "script" => nil}
204     script_range = {"exclude_versions" => []}
205     @filters.select! do |filter|
206       if (script_info.has_key? filter[0]) and (filter[1] == "=")
207         if script_info[filter[0]].nil?
208           script_info[filter[0]] = filter[2]
209         elsif script_info[filter[0]] != filter[2]
210           raise ArgumentError.new("incompatible #{filter[0]} filters")
211         end
212       end
213       case filter[0..1]
214       when ["script_version", "in git"]
215         script_range["min_version"] = filter.last
216         false
217       when ["script_version", "not in git"]
218         begin
219           script_range["exclude_versions"] += filter.last
220         rescue TypeError
221           script_range["exclude_versions"] << filter.last
222         end
223         false
224       when ["docker_image_locator", "in docker"], ["docker_image_locator", "not in docker"]
225         filter[1].sub!(/ docker$/, '')
226         search_list = filter[2].is_a?(Enumerable) ? filter[2] : [filter[2]]
227         filter[2] = search_list.flat_map do |search_term|
228           image_search, image_tag = search_term.split(':', 2)
229           Collection.find_all_for_docker_image(image_search, image_tag, @read_users).map(&:portable_data_hash)
230         end
231         true
232       else
233         true
234       end
235     end
236
237     # Build a real script_version filter from any "not? in git" filters.
238     if (script_range.size > 1) or script_range["exclude_versions"].any?
239       script_info.each_pair do |key, value|
240         if value.nil?
241           raise ArgumentError.new("script_version filter needs #{key} filter")
242         end
243       end
244       last_version = begin resource_attrs[:script_version] rescue "HEAD" end
245       version_range = Commit.find_commit_range(current_user,
246                                                script_info["repository"],
247                                                script_range["min_version"],
248                                                last_version,
249                                                script_range["exclude_versions"])
250       if version_range.nil?
251         raise ArgumentError.
252           new(["error searching #{script_info['repository']} from",
253                "'#{script_range['min_version']}' to '#{last_version}',",
254                "excluding #{script_range['exclude_versions']}"].join(" "))
255       end
256       @filters.append(["script_version", "in", version_range])
257     end
258   end
259
260   def load_filters_param
261     begin
262       super
263       load_job_specific_filters
264     rescue ArgumentError => error
265       send_error(error.message)
266       false
267     end
268   end
269 end