Merge branch 'master' of git.curoverse.com:arvados into 3408-production-datamanager
[arvados.git] / services / api / app / models / job.rb
1 class Job < ArvadosModel
2   include HasUuid
3   include KindAndEtag
4   include CommonApiTemplate
5   attr_protected :arvados_sdk_version, :docker_image_locator
6   serialize :script_parameters, Hash
7   serialize :runtime_constraints, Hash
8   serialize :tasks_summary, Hash
9   before_create :ensure_unique_submit_id
10   after_commit :trigger_crunch_dispatch_if_cancelled, :on => :update
11   before_validation :set_priority
12   before_validation :update_state_from_old_state_attrs
13   validate :ensure_script_version_is_commit
14   validate :find_arvados_sdk_version
15   validate :find_docker_image_locator
16   validate :validate_status
17   validate :validate_state_change
18   validate :ensure_no_collection_uuids_in_script_params
19   before_save :update_timestamps_when_state_changes
20
21   has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
22   has_many(:nodes, foreign_key: :job_uuid, primary_key: :uuid)
23
24   class SubmitIdReused < StandardError
25   end
26
27   api_accessible :user, extend: :common do |t|
28     t.add :submit_id
29     t.add :priority
30     t.add :script
31     t.add :script_parameters
32     t.add :script_version
33     t.add :cancelled_at
34     t.add :cancelled_by_client_uuid
35     t.add :cancelled_by_user_uuid
36     t.add :started_at
37     t.add :finished_at
38     t.add :output
39     t.add :success
40     t.add :running
41     t.add :state
42     t.add :is_locked_by_uuid
43     t.add :log
44     t.add :runtime_constraints
45     t.add :tasks_summary
46     t.add :dependencies
47     t.add :nondeterministic
48     t.add :repository
49     t.add :supplied_script_version
50     t.add :arvados_sdk_version
51     t.add :docker_image_locator
52     t.add :queue_position
53     t.add :node_uuids
54     t.add :description
55   end
56
57   # Supported states for a job
58   States = [
59             (Queued = 'Queued'),
60             (Running = 'Running'),
61             (Cancelled = 'Cancelled'),
62             (Failed = 'Failed'),
63             (Complete = 'Complete'),
64            ]
65
66   def assert_finished
67     update_attributes(finished_at: finished_at || Time.now,
68                       success: success.nil? ? false : success,
69                       running: false)
70   end
71
72   def node_uuids
73     nodes.map(&:uuid)
74   end
75
76   def self.queue
77     self.where('state = ?', Queued).order('priority desc, created_at')
78   end
79
80   def queue_position
81     Job::queue.each_with_index do |job, index|
82       if job[:uuid] == self.uuid
83         return index
84       end
85     end
86     nil
87   end
88
89   def self.running
90     self.where('running = ?', true).
91       order('priority desc, created_at')
92   end
93
94   def lock locked_by_uuid
95     transaction do
96       self.reload
97       unless self.state == Queued and self.is_locked_by_uuid.nil?
98         raise AlreadyLockedError
99       end
100       self.state = Running
101       self.is_locked_by_uuid = locked_by_uuid
102       self.save!
103     end
104   end
105
106   protected
107
108   def foreign_key_attributes
109     super + %w(output log)
110   end
111
112   def skip_uuid_read_permission_check
113     super + %w(cancelled_by_client_uuid)
114   end
115
116   def skip_uuid_existence_check
117     super + %w(output log)
118   end
119
120   def set_priority
121     if self.priority.nil?
122       self.priority = 0
123     end
124     true
125   end
126
127   def ensure_script_version_is_commit
128     if self.state == Running
129       # Apparently client has already decided to go for it. This is
130       # needed to run a local job using a local working directory
131       # instead of a commit-ish.
132       return true
133     end
134     if new_record? or script_version_changed?
135       sha1 = Commit.find_commit_range(current_user, self.repository, nil, self.script_version, nil)[0] rescue nil
136       if sha1
137         self.supplied_script_version = self.script_version if self.supplied_script_version.nil? or self.supplied_script_version.empty?
138         self.script_version = sha1
139       else
140         self.errors.add :script_version, "#{self.script_version} does not resolve to a commit"
141         return false
142       end
143     end
144   end
145
146   def ensure_unique_submit_id
147     if !submit_id.nil?
148       if Job.where('submit_id=?',self.submit_id).first
149         raise SubmitIdReused.new
150       end
151     end
152     true
153   end
154
155   def resolve_runtime_constraint(key, attr_sym)
156     if ((runtime_constraints.is_a? Hash) and
157         (search = runtime_constraints[key]))
158       ok, result = yield search
159     else
160       ok, result = true, nil
161     end
162     if ok
163       send("#{attr_sym}=".to_sym, result)
164     else
165       errors.add(attr_sym, result)
166     end
167     ok
168   end
169
170   def find_arvados_sdk_version
171     resolve_runtime_constraint("arvados_sdk_version",
172                                :arvados_sdk_version) do |git_search|
173       commits = Commit.find_commit_range(current_user, "arvados",
174                                          nil, git_search, nil)
175       if commits.nil? or commits.empty?
176         [false, "#{git_search} does not resolve to a commit"]
177       elsif not runtime_constraints["docker_image"]
178         [false, "cannot be specified without a Docker image constraint"]
179       else
180         [true, commits.first]
181       end
182     end
183   end
184
185   def find_docker_image_locator
186     resolve_runtime_constraint("docker_image",
187                                :docker_image_locator) do |image_search|
188       image_tag = runtime_constraints['docker_image_tag']
189       if coll = Collection.for_latest_docker_image(image_search, image_tag)
190         [true, coll.portable_data_hash]
191       else
192         [false, "not found for #{image_search}"]
193       end
194     end
195   end
196
197   def dependencies
198     deps = {}
199     queue = self.script_parameters.values
200     while not queue.empty?
201       queue = queue.flatten.compact.collect do |v|
202         if v.is_a? Hash
203           v.values
204         elsif v.is_a? String
205           v.match(/^(([0-9a-f]{32})\b(\+[^,]+)?,?)*$/) do |locator|
206             deps[locator.to_s] = true
207           end
208           nil
209         end
210       end
211     end
212     deps.keys
213   end
214
215   def permission_to_update
216     if is_locked_by_uuid_was and !(current_user and
217                                    (current_user.uuid == is_locked_by_uuid_was or
218                                     current_user.uuid == system_user.uuid))
219       if script_changed? or
220           script_parameters_changed? or
221           script_version_changed? or
222           (!cancelled_at_was.nil? and
223            (cancelled_by_client_uuid_changed? or
224             cancelled_by_user_uuid_changed? or
225             cancelled_at_changed?)) or
226           started_at_changed? or
227           finished_at_changed? or
228           running_changed? or
229           success_changed? or
230           output_changed? or
231           log_changed? or
232           tasks_summary_changed? or
233           state_changed?
234         logger.warn "User #{current_user.uuid if current_user} tried to change protected job attributes on locked #{self.class.to_s} #{uuid_was}"
235         return false
236       end
237     end
238     if !is_locked_by_uuid_changed?
239       super
240     else
241       if !current_user
242         logger.warn "Anonymous user tried to change lock on #{self.class.to_s} #{uuid_was}"
243         false
244       elsif is_locked_by_uuid_was and is_locked_by_uuid_was != current_user.uuid
245         logger.warn "User #{current_user.uuid} tried to steal lock on #{self.class.to_s} #{uuid_was} from #{is_locked_by_uuid_was}"
246         false
247       elsif !is_locked_by_uuid.nil? and is_locked_by_uuid != current_user.uuid
248         logger.warn "User #{current_user.uuid} tried to lock #{self.class.to_s} #{uuid_was} with uuid #{is_locked_by_uuid}"
249         false
250       else
251         super
252       end
253     end
254   end
255
256   def update_modified_by_fields
257     if self.cancelled_at_changed?
258       # Ensure cancelled_at cannot be set to arbitrary non-now times,
259       # or changed once it is set.
260       if self.cancelled_at and not self.cancelled_at_was
261         self.cancelled_at = Time.now
262         self.cancelled_by_user_uuid = current_user.uuid
263         self.cancelled_by_client_uuid = current_api_client.andand.uuid
264         @need_crunch_dispatch_trigger = true
265       else
266         self.cancelled_at = self.cancelled_at_was
267         self.cancelled_by_user_uuid = self.cancelled_by_user_uuid_was
268         self.cancelled_by_client_uuid = self.cancelled_by_client_uuid_was
269       end
270     end
271     super
272   end
273
274   def trigger_crunch_dispatch_if_cancelled
275     if @need_crunch_dispatch_trigger
276       File.open(Rails.configuration.crunch_refresh_trigger, 'wb') do
277         # That's all, just create/touch a file for crunch-job to see.
278       end
279     end
280   end
281
282   def update_timestamps_when_state_changes
283     return if not (state_changed? or new_record?)
284
285     case state
286     when Running
287       self.started_at ||= Time.now
288     when Failed, Complete
289       self.finished_at ||= Time.now
290     when Cancelled
291       self.cancelled_at ||= Time.now
292     end
293
294     # TODO: Remove the following case block when old "success" and
295     # "running" attrs go away. Until then, this ensures we still
296     # expose correct success/running flags to older clients, even if
297     # some new clients are writing only the new state attribute.
298     case state
299     when Queued
300       self.running = false
301       self.success = nil
302     when Running
303       self.running = true
304       self.success = nil
305     when Cancelled, Failed
306       self.running = false
307       self.success = false
308     when Complete
309       self.running = false
310       self.success = true
311     end
312     self.running ||= false # Default to false instead of nil.
313
314     @need_crunch_dispatch_trigger = true
315
316     true
317   end
318
319   def update_state_from_old_state_attrs
320     # If a client has touched the legacy state attrs, update the
321     # "state" attr to agree with the updated values of the legacy
322     # attrs.
323     #
324     # TODO: Remove this method when old "success" and "running" attrs
325     # go away.
326     if cancelled_at_changed? or
327         success_changed? or
328         running_changed? or
329         state.nil?
330       if cancelled_at
331         self.state = Cancelled
332       elsif success == false
333         self.state = Failed
334       elsif success == true
335         self.state = Complete
336       elsif running == true
337         self.state = Running
338       else
339         self.state = Queued
340       end
341     end
342     true
343   end
344
345   def validate_status
346     if self.state.in?(States)
347       true
348     else
349       errors.add :state, "#{state.inspect} must be one of: #{States.inspect}"
350       false
351     end
352   end
353
354   def validate_state_change
355     ok = true
356     if self.state_changed?
357       ok = case self.state_was
358            when nil
359              # state isn't set yet
360              true
361            when Queued
362              # Permit going from queued to any state
363              true
364            when Running
365              # From running, may only transition to a finished state
366              [Complete, Failed, Cancelled].include? self.state
367            when Complete, Failed, Cancelled
368              # Once in a finished state, don't permit any more state changes
369              false
370            else
371              # Any other state transition is also invalid
372              false
373            end
374       if not ok
375         errors.add :state, "invalid change from #{self.state_was} to #{self.state}"
376       end
377     end
378     ok
379   end
380
381   def ensure_no_collection_uuids_in_script_params
382     # recursive_hash_search searches recursively through hashes and
383     # arrays in 'thing' for string fields matching regular expression
384     # 'pattern'.  Returns true if pattern is found, false otherwise.
385     def recursive_hash_search thing, pattern
386       if thing.is_a? Hash
387         thing.each do |k, v|
388           return true if recursive_hash_search v, pattern
389         end
390       elsif thing.is_a? Array
391         thing.each do |k|
392           return true if recursive_hash_search k, pattern
393         end
394       elsif thing.is_a? String
395         return true if thing.match pattern
396       end
397       false
398     end
399
400     # Fail validation if any script_parameters field includes a string containing a
401     # collection uuid pattern.
402     if self.script_parameters_changed?
403       if recursive_hash_search(self.script_parameters, Collection.uuid_regex)
404         self.errors.add :script_parameters, "must use portable_data_hash instead of collection uuid"
405         return false
406       end
407     end
408     true
409   end
410 end