a831dd68b7e720bd1ef9d8959badcd79c574f760
[arvados.git] / services / api / app / models / job.rb
1 class Job < ArvadosModel
2   include HasUuid
3   include KindAndEtag
4   include CommonApiTemplate
5   attr_protected :docker_image_locator
6   serialize :script_parameters, Hash
7   serialize :runtime_constraints, Hash
8   serialize :tasks_summary, Hash
9   before_create :ensure_unique_submit_id
10   after_commit :trigger_crunch_dispatch_if_cancelled, :on => :update
11   before_validation :set_priority
12   validate :ensure_script_version_is_commit
13   validate :find_docker_image_locator
14   before_validation :verify_status
15   before_create :set_state_before_save
16   before_save :set_state_before_save
17
18   has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
19
20   class SubmitIdReused < StandardError
21   end
22
23   api_accessible :user, extend: :common do |t|
24     t.add :submit_id
25     t.add :priority
26     t.add :script
27     t.add :script_parameters
28     t.add :script_version
29     t.add :cancelled_at
30     t.add :cancelled_by_client_uuid
31     t.add :cancelled_by_user_uuid
32     t.add :started_at
33     t.add :finished_at
34     t.add :output
35     t.add :success
36     t.add :running
37     t.add :state
38     t.add :is_locked_by_uuid
39     t.add :log
40     t.add :runtime_constraints
41     t.add :tasks_summary
42     t.add :dependencies
43     t.add :nondeterministic
44     t.add :repository
45     t.add :supplied_script_version
46     t.add :docker_image_locator
47     t.add :queue_position
48     t.add :description
49   end
50
51   # Supported states for a job
52   States = [
53             (Queued = 'Queued'),
54             (Running = 'Running'),
55             (Cancelled = 'Cancelled'),
56             (Failed = 'Failed'),
57             (Complete = 'Complete'),
58            ]
59
60   def assert_finished
61     update_attributes(finished_at: finished_at || Time.now,
62                       success: success.nil? ? false : success,
63                       running: false)
64   end
65
66   def self.queue
67     self.where('started_at is ? and is_locked_by_uuid is ? and cancelled_at is ? and success is ?',
68                nil, nil, nil, nil).
69       order('priority desc, created_at')
70   end
71
72   def queue_position
73     i = 0
74     Job::queue.each do |j|
75       if j[:uuid] == self.uuid
76         return i
77       end
78     end
79     nil
80   end
81
82   def self.running
83     self.where('running = ?', true).
84       order('priority desc, created_at')
85   end
86
87   protected
88
89   def foreign_key_attributes
90     super + %w(output log)
91   end
92
93   def skip_uuid_read_permission_check
94     super + %w(cancelled_by_client_uuid)
95   end
96
97   def skip_uuid_existence_check
98     super + %w(output log)
99   end
100
101   def set_priority
102     if self.priority.nil?
103       self.priority = 0
104     end
105     true
106   end
107
108   def ensure_script_version_is_commit
109     if self.is_locked_by_uuid and self.started_at
110       # Apparently client has already decided to go for it. This is
111       # needed to run a local job using a local working directory
112       # instead of a commit-ish.
113       return true
114     end
115     if new_record? or script_version_changed?
116       sha1 = Commit.find_commit_range(current_user, self.repository, nil, self.script_version, nil)[0] rescue nil
117       if sha1
118         self.supplied_script_version = self.script_version if self.supplied_script_version.nil? or self.supplied_script_version.empty?
119         self.script_version = sha1
120       else
121         self.errors.add :script_version, "#{self.script_version} does not resolve to a commit"
122         return false
123       end
124     end
125   end
126
127   def ensure_unique_submit_id
128     if !submit_id.nil?
129       if Job.where('submit_id=?',self.submit_id).first
130         raise SubmitIdReused.new
131       end
132     end
133     true
134   end
135
136   def find_docker_image_locator
137     # Find the Collection that holds the Docker image specified in the
138     # runtime constraints, and store its locator in docker_image_locator.
139     unless runtime_constraints.is_a? Hash
140       # We're still in validation stage, so we can't assume
141       # runtime_constraints isn't something horrible like an array or
142       # a string. Treat those cases as "no docker image supplied";
143       # other validations will fail anyway.
144       self.docker_image_locator = nil
145       return true
146     end
147     image_search = runtime_constraints['docker_image']
148     image_tag = runtime_constraints['docker_image_tag']
149     if image_search.nil?
150       self.docker_image_locator = nil
151       true
152     elsif coll = Collection.for_latest_docker_image(image_search, image_tag)
153       self.docker_image_locator = coll.portable_data_hash
154       true
155     else
156       errors.add(:docker_image_locator, "not found for #{image_search}")
157       false
158     end
159   end
160
161   def dependencies
162     deps = {}
163     queue = self.script_parameters.values
164     while not queue.empty?
165       queue = queue.flatten.compact.collect do |v|
166         if v.is_a? Hash
167           v.values
168         elsif v.is_a? String
169           v.match(/^(([0-9a-f]{32})\b(\+[^,]+)?,?)*$/) do |locator|
170             deps[locator.to_s] = true
171           end
172           nil
173         end
174       end
175     end
176     deps.keys
177   end
178
179   def permission_to_update
180     if is_locked_by_uuid_was and !(current_user and
181                                    (current_user.uuid == is_locked_by_uuid_was or
182                                     current_user.uuid == system_user.uuid))
183       if script_changed? or
184           script_parameters_changed? or
185           script_version_changed? or
186           (!cancelled_at_was.nil? and
187            (cancelled_by_client_uuid_changed? or
188             cancelled_by_user_uuid_changed? or
189             cancelled_at_changed?)) or
190           started_at_changed? or
191           finished_at_changed? or
192           running_changed? or
193           success_changed? or
194           output_changed? or
195           log_changed? or
196           tasks_summary_changed?
197         logger.warn "User #{current_user.uuid if current_user} tried to change protected job attributes on locked #{self.class.to_s} #{uuid_was}"
198         return false
199       end
200     end
201     if !is_locked_by_uuid_changed?
202       super
203     else
204       if !current_user
205         logger.warn "Anonymous user tried to change lock on #{self.class.to_s} #{uuid_was}"
206         false
207       elsif is_locked_by_uuid_was and is_locked_by_uuid_was != current_user.uuid
208         logger.warn "User #{current_user.uuid} tried to steal lock on #{self.class.to_s} #{uuid_was} from #{is_locked_by_uuid_was}"
209         false
210       elsif !is_locked_by_uuid.nil? and is_locked_by_uuid != current_user.uuid
211         logger.warn "User #{current_user.uuid} tried to lock #{self.class.to_s} #{uuid_was} with uuid #{is_locked_by_uuid}"
212         false
213       else
214         super
215       end
216     end
217   end
218
219   def update_modified_by_fields
220     if self.cancelled_at_changed?
221       # Ensure cancelled_at cannot be set to arbitrary non-now times,
222       # or changed once it is set.
223       if self.cancelled_at and not self.cancelled_at_was
224         self.cancelled_at = Time.now
225         self.cancelled_by_user_uuid = current_user.uuid
226         self.cancelled_by_client_uuid = current_api_client.andand.uuid
227         @need_crunch_dispatch_trigger = true
228       else
229         self.cancelled_at = self.cancelled_at_was
230         self.cancelled_by_user_uuid = self.cancelled_by_user_uuid_was
231         self.cancelled_by_client_uuid = self.cancelled_by_client_uuid_was
232       end
233     end
234     super
235   end
236
237   def trigger_crunch_dispatch_if_cancelled
238     if @need_crunch_dispatch_trigger
239       File.open(Rails.configuration.crunch_refresh_trigger, 'wb') do
240         # That's all, just create/touch a file for crunch-job to see.
241       end
242     end
243   end
244
245   def verify_status
246     changed_attributes = self.changed
247
248     if new_record?
249       self.state = Queued
250     elsif 'state'.in? changed_attributes
251       case self.state
252       when Queued
253         self.running = false
254         self.success = nil
255       when Running
256         if !self.started_at
257           self.started_at = Time.now
258         end
259         self.running = true
260         self.success = nil
261       when Cancelled
262         if !self.cancelled_at
263           self.cancelled_at = Time.now
264         end
265         self.running = false
266         self.success = false
267       when Failed
268         if !self.finished_at
269           self.finished_at = Time.now
270         end
271         self.running = false
272         self.success = false
273       when Complete
274         if !self.finished_at
275           self.finished_at = Time.now
276         end
277         self.running = false
278         self.success = true
279       end
280     elsif 'running'.in? changed_attributes
281       self.state = Running
282     elsif 'success'.in? changed_attributes
283       if success
284         self.state = Complete
285       else
286         self.state = Failed
287       end
288     elsif 'cancelled_at'.in? changed_attributes
289       self.state = Cancelled
290     end
291   end
292
293   def set_state_before_save
294     if !self.state
295       if self.cancelled_at
296         self.state = Cancelled
297       elsif self.success
298         self.state = Complete
299       elsif (!self.success.nil? && !self.success)
300         self.state = Failed
301       elsif (self.running && self.success.nil? && !self.cencelled_at)
302         self.state = Running
303       elsif !self.started_at && !self.cancelled_at && !self.is_locked_by_uuid && self.success.nil?
304         self.state = Queued
305       end
306     end
307
308     if self.state.in?(States)
309       true
310     else
311       errors.add :state, "'#{state.inspect} must be one of: [#{States.join ', '}]"
312       false
313     end
314   end
315
316 end