Add 'tools/arvbox/' from commit 'd3d368758db1f4a9fa5b89f77b5ee61d68ef5b72'
[arvados.git] / services / api / app / models / job.rb
1 class Job < ArvadosModel
2   include HasUuid
3   include KindAndEtag
4   include CommonApiTemplate
5   attr_protected :arvados_sdk_version, :docker_image_locator
6   serialize :script_parameters, Hash
7   serialize :runtime_constraints, Hash
8   serialize :tasks_summary, Hash
9   before_create :ensure_unique_submit_id
10   after_commit :trigger_crunch_dispatch_if_cancelled, :on => :update
11   before_validation :set_priority
12   before_validation :update_state_from_old_state_attrs
13   validate :ensure_script_version_is_commit
14   validate :find_docker_image_locator
15   validate :find_arvados_sdk_version
16   validate :validate_status
17   validate :validate_state_change
18   validate :ensure_no_collection_uuids_in_script_params
19   before_save :tag_version_in_internal_repository
20   before_save :update_timestamps_when_state_changes
21
22   has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
23   has_many(:nodes, foreign_key: :job_uuid, primary_key: :uuid)
24
25   class SubmitIdReused < StandardError
26   end
27
28   api_accessible :user, extend: :common do |t|
29     t.add :submit_id
30     t.add :priority
31     t.add :script
32     t.add :script_parameters
33     t.add :script_version
34     t.add :cancelled_at
35     t.add :cancelled_by_client_uuid
36     t.add :cancelled_by_user_uuid
37     t.add :started_at
38     t.add :finished_at
39     t.add :output
40     t.add :success
41     t.add :running
42     t.add :state
43     t.add :is_locked_by_uuid
44     t.add :log
45     t.add :runtime_constraints
46     t.add :tasks_summary
47     t.add :nondeterministic
48     t.add :repository
49     t.add :supplied_script_version
50     t.add :arvados_sdk_version
51     t.add :docker_image_locator
52     t.add :queue_position
53     t.add :node_uuids
54     t.add :description
55   end
56
57   # Supported states for a job
58   States = [
59             (Queued = 'Queued'),
60             (Running = 'Running'),
61             (Cancelled = 'Cancelled'),
62             (Failed = 'Failed'),
63             (Complete = 'Complete'),
64            ]
65
66   def assert_finished
67     update_attributes(finished_at: finished_at || db_current_time,
68                       success: success.nil? ? false : success,
69                       running: false)
70   end
71
72   def node_uuids
73     nodes.map(&:uuid)
74   end
75
76   def self.queue
77     self.where('state = ?', Queued).order('priority desc, created_at')
78   end
79
80   def queue_position
81     Job::queue.each_with_index do |job, index|
82       if job[:uuid] == self.uuid
83         return index
84       end
85     end
86     nil
87   end
88
89   def self.running
90     self.where('running = ?', true).
91       order('priority desc, created_at')
92   end
93
94   def lock locked_by_uuid
95     transaction do
96       self.reload
97       unless self.state == Queued and self.is_locked_by_uuid.nil?
98         raise AlreadyLockedError
99       end
100       self.state = Running
101       self.is_locked_by_uuid = locked_by_uuid
102       self.save!
103     end
104   end
105
106   protected
107
108   def foreign_key_attributes
109     super + %w(output log)
110   end
111
112   def skip_uuid_read_permission_check
113     super + %w(cancelled_by_client_uuid)
114   end
115
116   def skip_uuid_existence_check
117     super + %w(output log)
118   end
119
120   def set_priority
121     if self.priority.nil?
122       self.priority = 0
123     end
124     true
125   end
126
127   def ensure_script_version_is_commit
128     if state == Running
129       # Apparently client has already decided to go for it. This is
130       # needed to run a local job using a local working directory
131       # instead of a commit-ish.
132       return true
133     end
134     if new_record? or repository_changed? or script_version_changed?
135       sha1 = Commit.find_commit_range(repository,
136                                       nil, script_version, nil).first
137       if not sha1
138         errors.add :script_version, "#{script_version} does not resolve to a commit"
139         return false
140       end
141       if supplied_script_version.nil? or supplied_script_version.empty?
142         self.supplied_script_version = script_version
143       end
144       self.script_version = sha1
145     end
146     true
147   end
148
149   def tag_version_in_internal_repository
150     if state == Running
151       # No point now. See ensure_script_version_is_commit.
152       true
153     elsif errors.any?
154       # Won't be saved, and script_version might not even be valid.
155       true
156     elsif new_record? or repository_changed? or script_version_changed?
157       uuid_was = uuid
158       begin
159         assign_uuid
160         Commit.tag_in_internal_repository repository, script_version, uuid
161       rescue
162         uuid = uuid_was
163         raise
164       end
165     end
166   end
167
168   def ensure_unique_submit_id
169     if !submit_id.nil?
170       if Job.where('submit_id=?',self.submit_id).first
171         raise SubmitIdReused.new
172       end
173     end
174     true
175   end
176
177   def resolve_runtime_constraint(key, attr_sym)
178     if ((runtime_constraints.is_a? Hash) and
179         (search = runtime_constraints[key]))
180       ok, result = yield search
181     else
182       ok, result = true, nil
183     end
184     if ok
185       send("#{attr_sym}=".to_sym, result)
186     else
187       errors.add(attr_sym, result)
188     end
189     ok
190   end
191
192   def find_arvados_sdk_version
193     resolve_runtime_constraint("arvados_sdk_version",
194                                :arvados_sdk_version) do |git_search|
195       commits = Commit.find_commit_range("arvados",
196                                          nil, git_search, nil)
197       if commits.empty?
198         [false, "#{git_search} does not resolve to a commit"]
199       elsif not runtime_constraints["docker_image"]
200         [false, "cannot be specified without a Docker image constraint"]
201       else
202         [true, commits.first]
203       end
204     end
205   end
206
207   def find_docker_image_locator
208     runtime_constraints['docker_image'] =
209         Rails.configuration.default_docker_image_for_jobs if ((runtime_constraints.is_a? Hash) and
210                                                               (runtime_constraints['docker_image']).nil? and
211                                                               Rails.configuration.default_docker_image_for_jobs)
212     resolve_runtime_constraint("docker_image",
213                                :docker_image_locator) do |image_search|
214       image_tag = runtime_constraints['docker_image_tag']
215       if coll = Collection.for_latest_docker_image(image_search, image_tag)
216         [true, coll.portable_data_hash]
217       else
218         [false, "not found for #{image_search}"]
219       end
220     end
221   end
222
223   def permission_to_update
224     if is_locked_by_uuid_was and !(current_user and
225                                    (current_user.uuid == is_locked_by_uuid_was or
226                                     current_user.uuid == system_user.uuid))
227       if script_changed? or
228           script_parameters_changed? or
229           script_version_changed? or
230           (!cancelled_at_was.nil? and
231            (cancelled_by_client_uuid_changed? or
232             cancelled_by_user_uuid_changed? or
233             cancelled_at_changed?)) or
234           started_at_changed? or
235           finished_at_changed? or
236           running_changed? or
237           success_changed? or
238           output_changed? or
239           log_changed? or
240           tasks_summary_changed? or
241           state_changed?
242         logger.warn "User #{current_user.uuid if current_user} tried to change protected job attributes on locked #{self.class.to_s} #{uuid_was}"
243         return false
244       end
245     end
246     if !is_locked_by_uuid_changed?
247       super
248     else
249       if !current_user
250         logger.warn "Anonymous user tried to change lock on #{self.class.to_s} #{uuid_was}"
251         false
252       elsif is_locked_by_uuid_was and is_locked_by_uuid_was != current_user.uuid
253         logger.warn "User #{current_user.uuid} tried to steal lock on #{self.class.to_s} #{uuid_was} from #{is_locked_by_uuid_was}"
254         false
255       elsif !is_locked_by_uuid.nil? and is_locked_by_uuid != current_user.uuid
256         logger.warn "User #{current_user.uuid} tried to lock #{self.class.to_s} #{uuid_was} with uuid #{is_locked_by_uuid}"
257         false
258       else
259         super
260       end
261     end
262   end
263
264   def update_modified_by_fields
265     if self.cancelled_at_changed?
266       # Ensure cancelled_at cannot be set to arbitrary non-now times,
267       # or changed once it is set.
268       if self.cancelled_at and not self.cancelled_at_was
269         self.cancelled_at = db_current_time
270         self.cancelled_by_user_uuid = current_user.uuid
271         self.cancelled_by_client_uuid = current_api_client.andand.uuid
272         @need_crunch_dispatch_trigger = true
273       else
274         self.cancelled_at = self.cancelled_at_was
275         self.cancelled_by_user_uuid = self.cancelled_by_user_uuid_was
276         self.cancelled_by_client_uuid = self.cancelled_by_client_uuid_was
277       end
278     end
279     super
280   end
281
282   def trigger_crunch_dispatch_if_cancelled
283     if @need_crunch_dispatch_trigger
284       File.open(Rails.configuration.crunch_refresh_trigger, 'wb') do
285         # That's all, just create/touch a file for crunch-job to see.
286       end
287     end
288   end
289
290   def update_timestamps_when_state_changes
291     return if not (state_changed? or new_record?)
292
293     case state
294     when Running
295       self.started_at ||= db_current_time
296     when Failed, Complete
297       self.finished_at ||= db_current_time
298     when Cancelled
299       self.cancelled_at ||= db_current_time
300     end
301
302     # TODO: Remove the following case block when old "success" and
303     # "running" attrs go away. Until then, this ensures we still
304     # expose correct success/running flags to older clients, even if
305     # some new clients are writing only the new state attribute.
306     case state
307     when Queued
308       self.running = false
309       self.success = nil
310     when Running
311       self.running = true
312       self.success = nil
313     when Cancelled, Failed
314       self.running = false
315       self.success = false
316     when Complete
317       self.running = false
318       self.success = true
319     end
320     self.running ||= false # Default to false instead of nil.
321
322     @need_crunch_dispatch_trigger = true
323
324     true
325   end
326
327   def update_state_from_old_state_attrs
328     # If a client has touched the legacy state attrs, update the
329     # "state" attr to agree with the updated values of the legacy
330     # attrs.
331     #
332     # TODO: Remove this method when old "success" and "running" attrs
333     # go away.
334     if cancelled_at_changed? or
335         success_changed? or
336         running_changed? or
337         state.nil?
338       if cancelled_at
339         self.state = Cancelled
340       elsif success == false
341         self.state = Failed
342       elsif success == true
343         self.state = Complete
344       elsif running == true
345         self.state = Running
346       else
347         self.state = Queued
348       end
349     end
350     true
351   end
352
353   def validate_status
354     if self.state.in?(States)
355       true
356     else
357       errors.add :state, "#{state.inspect} must be one of: #{States.inspect}"
358       false
359     end
360   end
361
362   def validate_state_change
363     ok = true
364     if self.state_changed?
365       ok = case self.state_was
366            when nil
367              # state isn't set yet
368              true
369            when Queued
370              # Permit going from queued to any state
371              true
372            when Running
373              # From running, may only transition to a finished state
374              [Complete, Failed, Cancelled].include? self.state
375            when Complete, Failed, Cancelled
376              # Once in a finished state, don't permit any more state changes
377              false
378            else
379              # Any other state transition is also invalid
380              false
381            end
382       if not ok
383         errors.add :state, "invalid change from #{self.state_was} to #{self.state}"
384       end
385     end
386     ok
387   end
388
389   def ensure_no_collection_uuids_in_script_params
390     # recursive_hash_search searches recursively through hashes and
391     # arrays in 'thing' for string fields matching regular expression
392     # 'pattern'.  Returns true if pattern is found, false otherwise.
393     def recursive_hash_search thing, pattern
394       if thing.is_a? Hash
395         thing.each do |k, v|
396           return true if recursive_hash_search v, pattern
397         end
398       elsif thing.is_a? Array
399         thing.each do |k|
400           return true if recursive_hash_search k, pattern
401         end
402       elsif thing.is_a? String
403         return true if thing.match pattern
404       end
405       false
406     end
407
408     # Fail validation if any script_parameters field includes a string containing a
409     # collection uuid pattern.
410     if self.script_parameters_changed?
411       if recursive_hash_search(self.script_parameters, Collection.uuid_regex)
412         self.errors.add :script_parameters, "must use portable_data_hash instead of collection uuid"
413         return false
414       end
415     end
416     true
417   end
418 end