Merge branch 'master' into 4232-slow-pipes-n-jobs
[arvados.git] / services / api / app / models / job.rb
1 class Job < ArvadosModel
2   include HasUuid
3   include KindAndEtag
4   include CommonApiTemplate
5   attr_protected :arvados_sdk_version, :docker_image_locator
6   serialize :script_parameters, Hash
7   serialize :runtime_constraints, Hash
8   serialize :tasks_summary, Hash
9   before_create :ensure_unique_submit_id
10   after_commit :trigger_crunch_dispatch_if_cancelled, :on => :update
11   before_validation :set_priority
12   before_validation :update_state_from_old_state_attrs
13   validate :ensure_script_version_is_commit
14   validate :find_arvados_sdk_version
15   validate :find_docker_image_locator
16   validate :validate_status
17   validate :validate_state_change
18   validate :ensure_no_collection_uuids_in_script_params
19   before_save :update_timestamps_when_state_changes
20
21   has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
22   has_many(:nodes, foreign_key: :job_uuid, primary_key: :uuid)
23
24   class SubmitIdReused < StandardError
25   end
26
27   api_accessible :user, extend: :common do |t|
28     t.add :submit_id
29     t.add :priority
30     t.add :script
31     t.add :script_parameters
32     t.add :script_version
33     t.add :cancelled_at
34     t.add :cancelled_by_client_uuid
35     t.add :cancelled_by_user_uuid
36     t.add :started_at
37     t.add :finished_at
38     t.add :output
39     t.add :success
40     t.add :running
41     t.add :state
42     t.add :is_locked_by_uuid
43     t.add :log
44     t.add :runtime_constraints
45     t.add :tasks_summary
46     t.add :nondeterministic
47     t.add :repository
48     t.add :supplied_script_version
49     t.add :arvados_sdk_version
50     t.add :docker_image_locator
51     t.add :queue_position
52     t.add :node_uuids
53     t.add :description
54   end
55
56   # Supported states for a job
57   States = [
58             (Queued = 'Queued'),
59             (Running = 'Running'),
60             (Cancelled = 'Cancelled'),
61             (Failed = 'Failed'),
62             (Complete = 'Complete'),
63            ]
64
65   def assert_finished
66     update_attributes(finished_at: finished_at || Time.now,
67                       success: success.nil? ? false : success,
68                       running: false)
69   end
70
71   def node_uuids
72     nodes.map(&:uuid)
73   end
74
75   def self.queue
76     self.where('state = ?', Queued).order('priority desc, created_at')
77   end
78
79   def queue_position
80     Job::queue.each_with_index do |job, index|
81       if job[:uuid] == self.uuid
82         return index
83       end
84     end
85     nil
86   end
87
88   def self.running
89     self.where('running = ?', true).
90       order('priority desc, created_at')
91   end
92
93   def lock locked_by_uuid
94     transaction do
95       self.reload
96       unless self.state == Queued and self.is_locked_by_uuid.nil?
97         raise AlreadyLockedError
98       end
99       self.state = Running
100       self.is_locked_by_uuid = locked_by_uuid
101       self.save!
102     end
103   end
104
105   protected
106
107   def foreign_key_attributes
108     super + %w(output log)
109   end
110
111   def skip_uuid_read_permission_check
112     super + %w(cancelled_by_client_uuid)
113   end
114
115   def skip_uuid_existence_check
116     super + %w(output log)
117   end
118
119   def set_priority
120     if self.priority.nil?
121       self.priority = 0
122     end
123     true
124   end
125
126   def ensure_script_version_is_commit
127     if self.state == Running
128       # Apparently client has already decided to go for it. This is
129       # needed to run a local job using a local working directory
130       # instead of a commit-ish.
131       return true
132     end
133     if new_record? or script_version_changed?
134       sha1 = Commit.find_commit_range(current_user, self.repository, nil, self.script_version, nil)[0] rescue nil
135       if sha1
136         self.supplied_script_version = self.script_version if self.supplied_script_version.nil? or self.supplied_script_version.empty?
137         self.script_version = sha1
138       else
139         self.errors.add :script_version, "#{self.script_version} does not resolve to a commit"
140         return false
141       end
142     end
143   end
144
145   def ensure_unique_submit_id
146     if !submit_id.nil?
147       if Job.where('submit_id=?',self.submit_id).first
148         raise SubmitIdReused.new
149       end
150     end
151     true
152   end
153
154   def resolve_runtime_constraint(key, attr_sym)
155     if ((runtime_constraints.is_a? Hash) and
156         (search = runtime_constraints[key]))
157       ok, result = yield search
158     else
159       ok, result = true, nil
160     end
161     if ok
162       send("#{attr_sym}=".to_sym, result)
163     else
164       errors.add(attr_sym, result)
165     end
166     ok
167   end
168
169   def find_arvados_sdk_version
170     resolve_runtime_constraint("arvados_sdk_version",
171                                :arvados_sdk_version) do |git_search|
172       commits = Commit.find_commit_range(current_user, "arvados",
173                                          nil, git_search, nil)
174       if commits.nil? or commits.empty?
175         [false, "#{git_search} does not resolve to a commit"]
176       elsif not runtime_constraints["docker_image"]
177         [false, "cannot be specified without a Docker image constraint"]
178       else
179         [true, commits.first]
180       end
181     end
182   end
183
184   def find_docker_image_locator
185     resolve_runtime_constraint("docker_image",
186                                :docker_image_locator) do |image_search|
187       image_tag = runtime_constraints['docker_image_tag']
188       if coll = Collection.for_latest_docker_image(image_search, image_tag)
189         [true, coll.portable_data_hash]
190       else
191         [false, "not found for #{image_search}"]
192       end
193     end
194   end
195
196   # def dependencies
197   #   deps = {}
198   #   queue = self.script_parameters.values
199   #   while not queue.empty?
200   #     queue = queue.flatten.compact.collect do |v|
201   #       if v.is_a? Hash
202   #         v.values
203   #       elsif v.is_a? String
204   #         v.match(/^(([0-9a-f]{32})\b(\+[^,]+)?,?)*$/) do |locator|
205   #           deps[locator.to_s] = true
206   #         end
207   #         nil
208   #       end
209   #     end
210   #   end
211   #   deps.keys
212   # end
213
214   def permission_to_update
215     if is_locked_by_uuid_was and !(current_user and
216                                    (current_user.uuid == is_locked_by_uuid_was or
217                                     current_user.uuid == system_user.uuid))
218       if script_changed? or
219           script_parameters_changed? or
220           script_version_changed? or
221           (!cancelled_at_was.nil? and
222            (cancelled_by_client_uuid_changed? or
223             cancelled_by_user_uuid_changed? or
224             cancelled_at_changed?)) or
225           started_at_changed? or
226           finished_at_changed? or
227           running_changed? or
228           success_changed? or
229           output_changed? or
230           log_changed? or
231           tasks_summary_changed? or
232           state_changed?
233         logger.warn "User #{current_user.uuid if current_user} tried to change protected job attributes on locked #{self.class.to_s} #{uuid_was}"
234         return false
235       end
236     end
237     if !is_locked_by_uuid_changed?
238       super
239     else
240       if !current_user
241         logger.warn "Anonymous user tried to change lock on #{self.class.to_s} #{uuid_was}"
242         false
243       elsif is_locked_by_uuid_was and is_locked_by_uuid_was != current_user.uuid
244         logger.warn "User #{current_user.uuid} tried to steal lock on #{self.class.to_s} #{uuid_was} from #{is_locked_by_uuid_was}"
245         false
246       elsif !is_locked_by_uuid.nil? and is_locked_by_uuid != current_user.uuid
247         logger.warn "User #{current_user.uuid} tried to lock #{self.class.to_s} #{uuid_was} with uuid #{is_locked_by_uuid}"
248         false
249       else
250         super
251       end
252     end
253   end
254
255   def update_modified_by_fields
256     if self.cancelled_at_changed?
257       # Ensure cancelled_at cannot be set to arbitrary non-now times,
258       # or changed once it is set.
259       if self.cancelled_at and not self.cancelled_at_was
260         self.cancelled_at = Time.now
261         self.cancelled_by_user_uuid = current_user.uuid
262         self.cancelled_by_client_uuid = current_api_client.andand.uuid
263         @need_crunch_dispatch_trigger = true
264       else
265         self.cancelled_at = self.cancelled_at_was
266         self.cancelled_by_user_uuid = self.cancelled_by_user_uuid_was
267         self.cancelled_by_client_uuid = self.cancelled_by_client_uuid_was
268       end
269     end
270     super
271   end
272
273   def trigger_crunch_dispatch_if_cancelled
274     if @need_crunch_dispatch_trigger
275       File.open(Rails.configuration.crunch_refresh_trigger, 'wb') do
276         # That's all, just create/touch a file for crunch-job to see.
277       end
278     end
279   end
280
281   def update_timestamps_when_state_changes
282     return if not (state_changed? or new_record?)
283
284     case state
285     when Running
286       self.started_at ||= Time.now
287     when Failed, Complete
288       self.finished_at ||= Time.now
289     when Cancelled
290       self.cancelled_at ||= Time.now
291     end
292
293     # TODO: Remove the following case block when old "success" and
294     # "running" attrs go away. Until then, this ensures we still
295     # expose correct success/running flags to older clients, even if
296     # some new clients are writing only the new state attribute.
297     case state
298     when Queued
299       self.running = false
300       self.success = nil
301     when Running
302       self.running = true
303       self.success = nil
304     when Cancelled, Failed
305       self.running = false
306       self.success = false
307     when Complete
308       self.running = false
309       self.success = true
310     end
311     self.running ||= false # Default to false instead of nil.
312
313     @need_crunch_dispatch_trigger = true
314
315     true
316   end
317
318   def update_state_from_old_state_attrs
319     # If a client has touched the legacy state attrs, update the
320     # "state" attr to agree with the updated values of the legacy
321     # attrs.
322     #
323     # TODO: Remove this method when old "success" and "running" attrs
324     # go away.
325     if cancelled_at_changed? or
326         success_changed? or
327         running_changed? or
328         state.nil?
329       if cancelled_at
330         self.state = Cancelled
331       elsif success == false
332         self.state = Failed
333       elsif success == true
334         self.state = Complete
335       elsif running == true
336         self.state = Running
337       else
338         self.state = Queued
339       end
340     end
341     true
342   end
343
344   def validate_status
345     if self.state.in?(States)
346       true
347     else
348       errors.add :state, "#{state.inspect} must be one of: #{States.inspect}"
349       false
350     end
351   end
352
353   def validate_state_change
354     ok = true
355     if self.state_changed?
356       ok = case self.state_was
357            when nil
358              # state isn't set yet
359              true
360            when Queued
361              # Permit going from queued to any state
362              true
363            when Running
364              # From running, may only transition to a finished state
365              [Complete, Failed, Cancelled].include? self.state
366            when Complete, Failed, Cancelled
367              # Once in a finished state, don't permit any more state changes
368              false
369            else
370              # Any other state transition is also invalid
371              false
372            end
373       if not ok
374         errors.add :state, "invalid change from #{self.state_was} to #{self.state}"
375       end
376     end
377     ok
378   end
379
380   def ensure_no_collection_uuids_in_script_params
381     # recursive_hash_search searches recursively through hashes and
382     # arrays in 'thing' for string fields matching regular expression
383     # 'pattern'.  Returns true if pattern is found, false otherwise.
384     def recursive_hash_search thing, pattern
385       if thing.is_a? Hash
386         thing.each do |k, v|
387           return true if recursive_hash_search v, pattern
388         end
389       elsif thing.is_a? Array
390         thing.each do |k|
391           return true if recursive_hash_search k, pattern
392         end
393       elsif thing.is_a? String
394         return true if thing.match pattern
395       end
396       false
397     end
398
399     # Fail validation if any script_parameters field includes a string containing a
400     # collection uuid pattern.
401     if self.script_parameters_changed?
402       if recursive_hash_search(self.script_parameters, Collection.uuid_regex)
403         self.errors.add :script_parameters, "must use portable_data_hash instead of collection uuid"
404         return false
405       end
406     end
407     true
408   end
409 end