1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: AGPL-3.0
5 require 'whitelist_update'
6 require 'arvados/collection'
8 class ContainerRequest < ArvadosModel
9 include ArvadosModelUpdates
12 include CommonApiTemplate
13 include WhitelistUpdate
15 belongs_to :container, foreign_key: :container_uuid, primary_key: :uuid
16 belongs_to :requesting_container, {
17 class_name: 'Container',
18 foreign_key: :requesting_container_uuid,
22 # Posgresql JSONB columns should NOT be declared as serialized, Rails 5
23 # already know how to properly treat them.
24 attribute :properties, :jsonbHash, default: {}
25 attribute :secret_mounts, :jsonbHash, default: {}
26 attribute :output_storage_classes, :jsonbArray, default: lambda { Rails.configuration.DefaultStorageClasses }
28 serialize :environment, Hash
29 serialize :mounts, Hash
30 serialize :runtime_constraints, Hash
31 serialize :command, Array
32 serialize :scheduling_parameters, Hash
34 after_find :fill_container_defaults_after_find
35 before_validation :fill_field_defaults, :if => :new_record?
36 before_validation :fill_container_defaults
37 validates :command, :container_image, :output_path, :cwd, :presence => true
38 validates :output_ttl, numericality: { only_integer: true, greater_than_or_equal_to: 0 }
39 validates :priority, numericality: { only_integer: true, greater_than_or_equal_to: 0, less_than_or_equal_to: 1000 }
40 validate :validate_datatypes
41 validate :validate_runtime_constraints
42 validate :validate_scheduling_parameters
43 validate :validate_state_change
44 validate :check_update_whitelist
45 validate :secret_mounts_key_conflict
46 validate :validate_runtime_token
47 after_validation :scrub_secrets
48 after_validation :set_preemptible
49 after_validation :set_container
50 before_create :set_requesting_container_uuid
51 before_destroy :set_priority_zero
52 after_save :update_priority
53 after_save :finalize_if_needed
55 api_accessible :user, extend: :common do |t|
57 t.add :container_count
58 t.add :container_count_max
59 t.add :container_image
75 t.add :requesting_container_uuid
76 t.add :runtime_constraints
77 t.add :scheduling_parameters
80 t.add :output_storage_classes
83 # Supported states for a container request
86 (Uncommitted = 'Uncommitted'),
87 (Committed = 'Committed'),
92 nil => [Uncommitted, Committed],
93 Uncommitted => [Committed],
97 AttrsPermittedAlways = [:owner_uuid, :state, :name, :description, :properties]
98 AttrsPermittedBeforeCommit = [:command, :container_count_max,
99 :container_image, :cwd, :environment, :filters, :mounts,
100 :output_path, :priority, :runtime_token,
101 :runtime_constraints, :state, :container_uuid, :use_existing,
102 :scheduling_parameters, :secret_mounts, :output_name, :output_ttl,
103 :output_storage_classes]
105 def self.any_preemptible_instances?
106 Rails.configuration.InstanceTypes.any? do |k, v|
111 def self.limit_index_columns_read
115 def logged_attributes
116 super.except('secret_mounts', 'runtime_token')
119 def state_transitions
123 def skip_uuid_read_permission_check
124 # The uuid_read_permission_check prevents users from making
125 # references to objects they can't view. However, in this case we
126 # don't want to do that check since there's a circular dependency
127 # where user can't view the container until the user has
128 # constructed the container request that references the container.
132 def finalize_if_needed
133 return if state != Committed
135 # get container lock first, then lock current container request
136 # (same order as Container#handle_completed). Locking always
137 # reloads the Container and ContainerRequest records.
138 c = Container.find_by_uuid(container_uuid)
142 if !c.nil? && container_uuid != c.uuid
143 # After locking, we've noticed a race, the container_uuid is
144 # different than the container record we just loaded. This
145 # can happen if Container#handle_completed scheduled a new
146 # container for retry and set container_uuid while we were
147 # waiting on the container lock. Restart the loop and get the
153 if state == Committed && c.final?
154 # The current container is
155 act_as_system_user do
156 leave_modified_by_user_alone do
161 elsif state == Committed
162 # Behave as if the container is cancelled
163 update_attributes!(state: Final)
169 # Finalize the container request after the container has
170 # finished/cancelled.
172 container = Container.find_by_uuid(container_uuid)
174 update_collections(container: container)
176 if container.state == Container::Complete
177 log_col = Collection.where(portable_data_hash: container.log).first
179 # Need to save collection
180 completed_coll = Collection.new(
181 owner_uuid: self.owner_uuid,
182 name: "Container log for container #{container_uuid}",
185 'container_request' => self.uuid,
186 'container_uuid' => container_uuid,
188 portable_data_hash: log_col.portable_data_hash,
189 manifest_text: log_col.manifest_text,
190 storage_classes_desired: self.output_storage_classes
192 completed_coll.save_with_unique_name!
196 update_attributes!(state: Final)
199 def update_collections(container:, collections: ['log', 'output'])
200 collections.each do |out_type|
201 pdh = container.send(out_type)
203 c = Collection.where(portable_data_hash: pdh).first
205 manifest = c.manifest_text
207 coll_name = "Container #{out_type} for request #{uuid}"
209 if out_type == 'output'
210 if self.output_name and self.output_name != ""
211 coll_name = self.output_name
213 if self.output_ttl > 0
214 trash_at = db_current_time + self.output_ttl
218 coll_uuid = self.send(out_type + '_uuid')
219 coll = coll_uuid.nil? ? nil : Collection.where(uuid: coll_uuid).first
221 coll = Collection.new(
222 owner_uuid: self.owner_uuid,
225 storage_classes_desired: self.output_storage_classes,
228 'container_request' => uuid,
233 # Copy the log into a merged collection
234 src = Arv::Collection.new(manifest)
235 dst = Arv::Collection.new(coll.manifest_text)
236 dst.cp_r("./", ".", src)
237 dst.cp_r("./", "log for container #{container.uuid}", src)
238 manifest = dst.manifest_text
241 coll.assign_attributes(
242 portable_data_hash: Digest::MD5.hexdigest(manifest) + '+' + manifest.bytesize.to_s,
243 manifest_text: manifest,
246 coll.save_with_unique_name!
247 self.send(out_type + '_uuid=', coll.uuid)
251 def self.full_text_searchable_columns
252 super - ["mounts", "secret_mounts", "secret_mounts_md5", "runtime_token", "output_storage_classes"]
257 def fill_field_defaults
258 self.state ||= Uncommitted
259 self.environment ||= {}
260 self.runtime_constraints ||= {}
262 self.secret_mounts ||= {}
264 self.container_count_max ||= Rails.configuration.Containers.MaxRetryAttempts
265 self.scheduling_parameters ||= {}
266 self.output_ttl ||= 0
271 if (container_uuid_changed? and
272 not current_user.andand.is_admin and
273 not container_uuid.nil?)
274 errors.add :container_uuid, "can only be updated to nil."
277 if self.container_count_changed?
278 errors.add :container_count, "cannot be updated directly."
281 if state_changed? and state == Committed and container_uuid.nil?
283 c = Container.resolve(self)
285 if c.state == Container::Cancelled
286 # Lost a race, we have a lock on the container but the
287 # container was cancelled in a different request, restart
288 # the loop and resolve request to a new container.
291 self.container_uuid = c.uuid
295 if self.container_uuid != self.container_uuid_was
296 self.container_count += 1
297 return if self.container_uuid_was.nil?
299 old_container = Container.find_by_uuid(self.container_uuid_was)
300 return if old_container.nil?
302 old_logs = Collection.where(portable_data_hash: old_container.log).first
303 return if old_logs.nil?
305 log_coll = self.log_uuid.nil? ? nil : Collection.where(uuid: self.log_uuid).first
306 if self.log_uuid.nil?
307 log_coll = Collection.new(
308 owner_uuid: self.owner_uuid,
309 name: coll_name = "Container log for request #{uuid}",
311 storage_classes_desired: self.output_storage_classes)
314 # copy logs from old container into CR's log collection
315 src = Arv::Collection.new(old_logs.manifest_text)
316 dst = Arv::Collection.new(log_coll.manifest_text)
317 dst.cp_r("./", "log for container #{old_container.uuid}", src)
318 manifest = dst.manifest_text
320 log_coll.assign_attributes(
321 portable_data_hash: Digest::MD5.hexdigest(manifest) + '+' + manifest.bytesize.to_s,
322 manifest_text: manifest)
323 log_coll.save_with_unique_name!
324 self.log_uuid = log_coll.uuid
329 if (new_record? || state_changed?) &&
330 state == Committed &&
331 Rails.configuration.Containers.AlwaysUsePreemptibleInstances &&
332 get_requesting_container_uuid() &&
333 self.class.any_preemptible_instances?
334 self.scheduling_parameters['preemptible'] = true
338 def validate_runtime_constraints
341 ['vcpus', 'ram'].each do |k|
342 v = runtime_constraints[k]
343 if !v.is_a?(Integer) || v <= 0
344 errors.add(:runtime_constraints,
345 "[#{k}]=#{v.inspect} must be a positive integer")
348 if runtime_constraints['cuda']
349 ['device_count'].each do |k|
350 v = runtime_constraints['cuda'][k]
351 if !v.is_a?(Integer) || v < 0
352 errors.add(:runtime_constraints,
353 "[cuda.#{k}]=#{v.inspect} must be a positive or zero integer")
356 ['driver_version', 'hardware_capability'].each do |k|
357 v = runtime_constraints['cuda'][k]
358 if !v.is_a?(String) || (runtime_constraints['cuda']['device_count'] > 0 && v.to_f == 0.0)
359 errors.add(:runtime_constraints,
360 "[cuda.#{k}]=#{v.inspect} must be a string in format 'X.Y'")
367 def validate_datatypes
370 errors.add(:command, "must be an array of strings but has entry #{c.class}")
373 environment.each do |k,v|
374 if !k.is_a?(String) || !v.is_a?(String)
375 errors.add(:environment, "must be an map of String to String but has entry #{k.class} to #{v.class}")
378 [:mounts, :secret_mounts].each do |m|
379 self[m].each do |k, v|
380 if !k.is_a?(String) || !v.is_a?(Hash)
381 errors.add(m, "must be an map of String to Hash but is has entry #{k.class} to #{v.class}")
384 errors.add(m, "each item must have a 'kind' field")
386 [[String, ["kind", "portable_data_hash", "uuid", "device_type",
387 "path", "commit", "repository_name", "git_url"]],
388 [Integer, ["capacity"]]].each do |t, fields|
390 if !v[f].nil? && !v[f].is_a?(t)
391 errors.add(m, "#{k}: #{f} must be a #{t} but is #{v[f].class}")
395 ["writable", "exclude_from_output"].each do |f|
396 if !v[f].nil? && !v[f].is_a?(TrueClass) && !v[f].is_a?(FalseClass)
397 errors.add(m, "#{k}: #{f} must be a #{t} but is #{v[f].class}")
404 def validate_scheduling_parameters
405 if self.state == Committed
406 if scheduling_parameters.include? 'partitions' and
407 (!scheduling_parameters['partitions'].is_a?(Array) ||
408 scheduling_parameters['partitions'].reject{|x| !x.is_a?(String)}.size !=
409 scheduling_parameters['partitions'].size)
410 errors.add :scheduling_parameters, "partitions must be an array of strings"
412 if scheduling_parameters['preemptible'] &&
413 (new_record? || state_changed?) &&
414 !self.class.any_preemptible_instances?
415 errors.add :scheduling_parameters, "preemptible instances are not configured in InstanceTypes"
417 if scheduling_parameters.include? 'max_run_time' and
418 (!scheduling_parameters['max_run_time'].is_a?(Integer) ||
419 scheduling_parameters['max_run_time'] < 0)
420 errors.add :scheduling_parameters, "max_run_time must be positive integer"
425 def check_update_whitelist
426 permitted = AttrsPermittedAlways.dup
428 if self.new_record? || self.state_was == Uncommitted
429 # Allow create-and-commit in a single operation.
430 permitted.push(*AttrsPermittedBeforeCommit)
431 elsif mounts_changed? && mounts_was.keys.sort == mounts.keys.sort
432 # Ignore the updated mounts if the only changes are default/zero
433 # values as added by controller, see 17774
435 mounts.each do |path, mount|
436 (mount.to_a - mounts_was[path].to_a).each do |k, v|
437 if ![0, "", false, nil].index(v)
438 only_defaults = false
443 clear_attribute_change("mounts")
449 permitted.push :priority, :container_count_max, :container_uuid
451 if self.priority.nil?
452 self.errors.add :priority, "cannot be nil"
455 # Allow container count to increment (not by client, only by us
456 # -- see set_container)
457 permitted.push :container_count
459 if current_user.andand.is_admin
460 permitted.push :log_uuid
464 if self.state_was == Committed
465 # "Cancel" means setting priority=0, state=Committed
466 permitted.push :priority
468 if current_user.andand.is_admin
469 permitted.push :output_uuid, :log_uuid
478 def secret_mounts_key_conflict
479 secret_mounts.each do |k, v|
480 if mounts.has_key?(k)
481 errors.add(:secret_mounts, 'conflict with non-secret mounts')
487 def validate_runtime_token
488 if !self.runtime_token.nil? && self.runtime_token_changed?
489 if !runtime_token[0..2] == "v2/"
490 errors.add :runtime_token, "not a v2 token"
493 if ApiClientAuthorization.validate(token: runtime_token).nil?
494 errors.add :runtime_token, "failed validation"
500 if self.state == Final
501 self.secret_mounts = {}
502 self.runtime_token = nil
507 return unless saved_change_to_state? || saved_change_to_priority? || saved_change_to_container_uuid?
508 act_as_system_user do
510 where('uuid in (?)', [container_uuid_before_last_save, self.container_uuid].compact).
511 map(&:update_priority!)
515 def set_priority_zero
516 self.update_attributes!(priority: 0) if self.state != Final
519 def set_requesting_container_uuid
520 if (self.requesting_container_uuid = get_requesting_container_uuid())
521 # Determine the priority of container request for the requesting
523 self.priority = ContainerRequest.where(container_uuid: self.requesting_container_uuid).maximum("priority") || 0
527 def get_requesting_container_uuid
528 return self.requesting_container_uuid || Container.for_current_token.andand.uuid