1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: AGPL-3.0
5 require 'whitelist_update'
6 require 'arvados/collection'
8 class ContainerRequest < ArvadosModel
9 include ArvadosModelUpdates
12 include CommonApiTemplate
13 include WhitelistUpdate
15 belongs_to :container,
16 foreign_key: 'container_uuid',
19 belongs_to :requesting_container,
20 class_name: 'Container',
21 foreign_key: 'requesting_container_uuid',
25 # Posgresql JSONB columns should NOT be declared as serialized, Rails 5
26 # already know how to properly treat them.
27 attribute :properties, :jsonbHash, default: {}
28 attribute :secret_mounts, :jsonbHash, default: {}
29 attribute :output_storage_classes, :jsonbArray, default: lambda { Rails.configuration.DefaultStorageClasses }
30 attribute :output_properties, :jsonbHash, default: {}
32 serialize :environment, Hash
33 serialize :mounts, Hash
34 serialize :runtime_constraints, Hash
35 serialize :command, Array
36 serialize :scheduling_parameters, Hash
37 serialize :output_glob, Array
39 after_find :fill_container_defaults_after_find
40 after_initialize { @state_was_when_initialized = self.state_was } # see finalize_if_needed
41 before_validation :fill_field_defaults, :if => :new_record?
42 before_validation :fill_container_defaults
43 validates :command, :container_image, :output_path, :cwd, :presence => true
44 validates :output_ttl, numericality: { only_integer: true, greater_than_or_equal_to: 0 }
45 validates :priority, numericality: { only_integer: true, greater_than_or_equal_to: 0, less_than_or_equal_to: 1000 }
46 validate :validate_datatypes
47 validate :validate_runtime_constraints
48 validate :validate_scheduling_parameters
49 validate :validate_state_change
50 validate :check_update_whitelist
51 validate :secret_mounts_key_conflict
52 validate :validate_runtime_token
53 after_validation :scrub_secrets
54 after_validation :set_preemptible
55 after_validation :set_container
56 before_create :set_requesting_container_uuid
57 before_destroy :set_priority_zero
58 after_save :update_priority
59 after_save :finalize_if_needed
61 api_accessible :user, extend: :common do |t|
63 t.add :container_count
64 t.add :container_count_max
65 t.add :container_image
82 t.add :requesting_container_uuid
83 t.add :runtime_constraints
84 t.add :scheduling_parameters
87 t.add :output_storage_classes
88 t.add :output_properties
89 t.add :cumulative_cost
92 # Supported states for a container request
95 (Uncommitted = 'Uncommitted'),
96 (Committed = 'Committed'),
100 State_transitions = {
101 nil => [Uncommitted, Committed],
102 Uncommitted => [Committed],
106 AttrsPermittedAlways = [:owner_uuid, :state, :name, :description, :properties]
107 AttrsPermittedBeforeCommit = [:command, :container_count_max,
108 :container_image, :cwd, :environment, :filters, :mounts,
109 :output_path, :output_glob, :priority, :runtime_token,
110 :runtime_constraints, :state, :container_uuid, :use_existing,
111 :scheduling_parameters, :secret_mounts, :output_name, :output_ttl,
112 :output_storage_classes, :output_properties]
114 def self.any_preemptible_instances?
115 Rails.configuration.InstanceTypes.any? do |k, v|
120 def self.limit_index_columns_read
124 def logged_attributes
125 super.except('secret_mounts', 'runtime_token')
128 def state_transitions
132 def skip_uuid_read_permission_check
133 # The uuid_read_permission_check prevents users from making
134 # references to objects they can't view. However, in this case we
135 # don't want to do that check since there's a circular dependency
136 # where user can't view the container until the user has
137 # constructed the container request that references the container.
141 def finalize_if_needed
142 return if state != Committed
144 # get container lock first, then lock current container request
145 # (same order as Container#handle_completed). Locking always
146 # reloads the Container and ContainerRequest records.
147 c = Container.find_by_uuid(container_uuid)
151 if !c.nil? && container_uuid != c.uuid
152 # After locking, we've noticed a race, the container_uuid is
153 # different than the container record we just loaded. This
154 # can happen if Container#handle_completed scheduled a new
155 # container for retry and set container_uuid while we were
156 # waiting on the container lock. Restart the loop and get the
162 if state == Committed && c.final?
163 # The current container is
164 act_as_system_user do
165 leave_modified_by_user_alone do
170 elsif state == Committed
171 # Behave as if the container is cancelled
172 update!(state: Final)
178 # Finalize the container request after the container has
179 # finished/cancelled.
181 container = Container.find_by_uuid(container_uuid)
183 # We don't want to add the container cost if the container was
184 # already finished when this CR was committed. But we are
185 # running in an after_save hook after a lock/reload, so
186 # state_was has already been updated to Committed regardless.
187 # Hence the need for @state_was_when_initialized.
188 if @state_was_when_initialized == Committed
189 # Add the final container cost to our cumulative cost (which
190 # may already be non-zero from previous attempts if
191 # container_count_max > 1).
192 self.cumulative_cost += container.cost + container.subrequests_cost
195 # Add our cumulative cost to the subrequests_cost of the
196 # requesting container, if any.
197 if self.requesting_container_uuid
199 uuid: self.requesting_container_uuid,
200 state: Container::Running,
202 c.subrequests_cost += self.cumulative_cost
207 update_collections(container: container)
208 # update_collections makes a log collection that includes all of the logs
209 # for all of the containers associated with this request. For requests
210 # that are retried, this is the primary way users can get logs for
212 # The code below makes a log collection that is a verbatim copy of the
213 # container's logs. This is required for container reuse: a container
214 # will not be reused if the owner cannot read a collection with its logs.
215 # See the "readable log" section of Container.find_reusable().
216 if container.state == Container::Complete
217 log_col = Collection.where(portable_data_hash: container.log).first
219 # Need to save collection
220 completed_coll = Collection.new(
221 owner_uuid: self.owner_uuid,
222 name: "Container log for container #{container_uuid}",
225 'container_request' => self.uuid,
226 'container_uuid' => container_uuid,
228 portable_data_hash: log_col.portable_data_hash,
229 manifest_text: log_col.manifest_text,
230 storage_classes_desired: self.output_storage_classes
232 completed_coll.save_with_unique_name!
236 update!(state: Final)
239 def update_collections(container:, collections: ['log', 'output'])
241 # Check if parent is frozen or trashed, in which case it isn't
242 # valid to create new collections in the project, so return
243 # without creating anything.
244 owner = Group.find_by_uuid(self.owner_uuid)
245 return if owner && !owner.admin_change_permitted
247 collections.each do |out_type|
248 pdh = container.send(out_type)
250 c = Collection.where(portable_data_hash: pdh).first
252 manifest = c.manifest_text
254 coll_name = "Container #{out_type} for request #{uuid}"
256 if out_type == 'output'
257 if self.output_name and self.output_name != ""
258 coll_name = self.output_name
260 if self.output_ttl > 0
261 trash_at = db_current_time + self.output_ttl
265 coll_uuid = self.send(out_type + '_uuid')
266 coll = coll_uuid.nil? ? nil : Collection.where(uuid: coll_uuid).first
268 coll = Collection.new(
269 owner_uuid: self.owner_uuid,
272 storage_classes_desired: self.output_storage_classes)
276 # Copy the log into a merged collection
277 src = Arv::Collection.new(manifest)
278 dst = Arv::Collection.new(coll.manifest_text)
279 dst.cp_r("./", ".", src)
280 dst.cp_r("./", "log for container #{container.uuid}", src)
281 manifest = dst.manifest_text
284 merged_properties = {}
285 merged_properties['container_request'] = uuid
287 if out_type == 'output' and !requesting_container_uuid.nil?
288 # output of a child process, give it "intermediate" type by
290 merged_properties['type'] = 'intermediate'
292 merged_properties['type'] = out_type
295 if out_type == "output"
296 merged_properties.update(container.output_properties)
297 merged_properties.update(self.output_properties)
300 coll.assign_attributes(
301 portable_data_hash: Digest::MD5.hexdigest(manifest) + '+' + manifest.bytesize.to_s,
302 manifest_text: manifest,
305 properties: merged_properties)
306 coll.save_with_unique_name!
307 self.send(out_type + '_uuid=', coll.uuid)
311 def self.full_text_searchable_columns
312 super - ["mounts", "secret_mounts", "secret_mounts_md5", "runtime_token", "output_storage_classes", "output_glob"]
315 def set_priority_zero
316 self.update!(priority: 0) if self.priority > 0 && self.state != Final
321 def fill_field_defaults
322 self.state ||= Uncommitted
323 self.environment ||= {}
324 self.runtime_constraints ||= {}
326 self.secret_mounts ||= {}
328 self.container_count_max ||= Rails.configuration.Containers.MaxRetryAttempts
329 self.scheduling_parameters ||= {}
330 self.output_ttl ||= 0
331 self.output_glob ||= []
336 if (container_uuid_changed? and
337 not current_user.andand.is_admin and
338 not container_uuid.nil?)
339 errors.add :container_uuid, "can only be updated to nil."
342 if self.container_count_changed?
343 errors.add :container_count, "cannot be updated directly."
346 if state_changed? and state == Committed and container_uuid.nil?
347 if self.command.length > 0 and self.command[0] == "arvados-cwl-runner"
348 # Special case, arvados-cwl-runner processes are always considered "supervisors"
349 self.scheduling_parameters['supervisor'] = true
352 c = Container.resolve(self)
354 if c.state == Container::Cancelled
355 # Lost a race, we have a lock on the container but the
356 # container was cancelled in a different request, restart
357 # the loop and resolve request to a new container.
360 self.container_uuid = c.uuid
364 if self.container_uuid != self.container_uuid_was
365 self.container_count += 1
366 return if self.container_uuid_was.nil?
368 old_container_uuid = self.container_uuid_was
369 old_container_log = Container.where(uuid: old_container_uuid).pluck(:log).first
370 return if old_container_log.nil?
372 old_logs = Collection.where(portable_data_hash: old_container_log).first
373 return if old_logs.nil?
375 log_coll = self.log_uuid.nil? ? nil : Collection.where(uuid: self.log_uuid).first
376 if self.log_uuid.nil?
377 log_coll = Collection.new(
378 owner_uuid: self.owner_uuid,
379 name: coll_name = "Container log for request #{uuid}",
381 storage_classes_desired: self.output_storage_classes)
384 # copy logs from old container into CR's log collection
385 src = Arv::Collection.new(old_logs.manifest_text)
386 dst = Arv::Collection.new(log_coll.manifest_text)
387 dst.cp_r("./", "log for container #{old_container_uuid}", src)
388 manifest = dst.manifest_text
390 log_coll.assign_attributes(
391 portable_data_hash: Digest::MD5.hexdigest(manifest) + '+' + manifest.bytesize.to_s,
392 manifest_text: manifest)
393 log_coll.save_with_unique_name!
394 self.log_uuid = log_coll.uuid
399 if (new_record? || state_changed?) &&
400 state == Committed &&
401 Rails.configuration.Containers.AlwaysUsePreemptibleInstances &&
402 get_requesting_container_uuid() &&
403 self.class.any_preemptible_instances?
404 self.scheduling_parameters['preemptible'] = true
408 def validate_runtime_constraints
411 ['vcpus', 'ram'].each do |k|
412 v = runtime_constraints[k]
413 if !v.is_a?(Integer) || v <= 0
414 errors.add(:runtime_constraints,
415 "[#{k}]=#{v.inspect} must be a positive integer")
418 if runtime_constraints['cuda']
419 ['device_count'].each do |k|
420 v = runtime_constraints['cuda'][k]
421 if !v.is_a?(Integer) || v < 0
422 errors.add(:runtime_constraints,
423 "[cuda.#{k}]=#{v.inspect} must be a positive or zero integer")
426 ['driver_version', 'hardware_capability'].each do |k|
427 v = runtime_constraints['cuda'][k]
428 if !v.is_a?(String) || (runtime_constraints['cuda']['device_count'] > 0 && v.to_f == 0.0)
429 errors.add(:runtime_constraints,
430 "[cuda.#{k}]=#{v.inspect} must be a string in format 'X.Y'")
437 def validate_datatypes
440 errors.add(:command, "must be an array of strings but has entry #{c.class}")
443 environment.each do |k,v|
444 if !k.is_a?(String) || !v.is_a?(String)
445 errors.add(:environment, "must be an map of String to String but has entry #{k.class} to #{v.class}")
448 output_glob.each do |g|
450 errors.add(:output_glob, "must be an array of strings but has entry #{g.class}")
453 [:mounts, :secret_mounts].each do |m|
454 self[m].each do |k, v|
455 if !k.is_a?(String) || !v.is_a?(Hash)
456 errors.add(m, "must be an map of String to Hash but is has entry #{k.class} to #{v.class}")
459 errors.add(m, "each item must have a 'kind' field")
461 [[String, ["kind", "portable_data_hash", "uuid", "device_type",
462 "path", "commit", "repository_name", "git_url"]],
463 [Integer, ["capacity"]]].each do |t, fields|
465 if !v[f].nil? && !v[f].is_a?(t)
466 errors.add(m, "#{k}: #{f} must be a #{t} but is #{v[f].class}")
470 ["writable", "exclude_from_output"].each do |f|
471 if !v[f].nil? && !v[f].is_a?(TrueClass) && !v[f].is_a?(FalseClass)
472 errors.add(m, "#{k}: #{f} must be a #{t} but is #{v[f].class}")
479 def validate_scheduling_parameters
480 if self.state == Committed
481 if scheduling_parameters.include?('partitions') and
482 !scheduling_parameters['partitions'].nil? and
483 (!scheduling_parameters['partitions'].is_a?(Array) ||
484 scheduling_parameters['partitions'].reject{|x| !x.is_a?(String)}.size !=
485 scheduling_parameters['partitions'].size)
486 errors.add :scheduling_parameters, "partitions must be an array of strings"
488 if scheduling_parameters['preemptible'] &&
489 (new_record? || state_changed?) &&
490 !self.class.any_preemptible_instances?
491 errors.add :scheduling_parameters, "preemptible instances are not configured in InstanceTypes"
493 if scheduling_parameters.include? 'max_run_time' and
494 (!scheduling_parameters['max_run_time'].is_a?(Integer) ||
495 scheduling_parameters['max_run_time'] < 0)
496 errors.add :scheduling_parameters, "max_run_time must be positive integer"
501 def check_update_whitelist
502 permitted = AttrsPermittedAlways.dup
504 if self.new_record? || self.state_was == Uncommitted
505 # Allow create-and-commit in a single operation.
506 permitted.push(*AttrsPermittedBeforeCommit)
507 elsif mounts_changed? && mounts_was.keys.sort == mounts.keys.sort
508 # Ignore the updated mounts if the only changes are default/zero
509 # values as added by controller, see 17774
511 mounts.each do |path, mount|
512 (mount.to_a - mounts_was[path].to_a).each do |k, v|
513 if ![0, "", false, nil].index(v)
514 only_defaults = false
519 clear_attribute_change("mounts")
525 permitted.push :priority, :container_count_max, :container_uuid, :cumulative_cost
527 if self.priority.nil?
528 self.errors.add :priority, "cannot be nil"
531 # Allow container count to increment (not by client, only by us
532 # -- see set_container)
533 permitted.push :container_count
535 if current_user.andand.is_admin
536 permitted.push :log_uuid
540 if self.state_was == Committed
541 # "Cancel" means setting priority=0, state=Committed
542 permitted.push :priority, :cumulative_cost
544 if current_user.andand.is_admin
545 permitted.push :output_uuid, :log_uuid
554 def secret_mounts_key_conflict
555 secret_mounts.each do |k, v|
556 if mounts.has_key?(k)
557 errors.add(:secret_mounts, 'conflict with non-secret mounts')
563 def validate_runtime_token
564 if !self.runtime_token.nil? && self.runtime_token_changed?
565 if !runtime_token[0..2] == "v2/"
566 errors.add :runtime_token, "not a v2 token"
569 if ApiClientAuthorization.validate(token: runtime_token).nil?
570 errors.add :runtime_token, "failed validation"
576 if self.state == Final
577 self.secret_mounts = {}
578 self.runtime_token = nil
583 return unless saved_change_to_state? || saved_change_to_priority? || saved_change_to_container_uuid?
584 update_priorities container_uuid_before_last_save if !container_uuid_before_last_save.nil? and container_uuid_before_last_save != self.container_uuid
585 update_priorities self.container_uuid if self.container_uuid
588 def set_requesting_container_uuid
589 if (self.requesting_container_uuid = get_requesting_container_uuid())
590 # Determine the priority of container request for the requesting
592 self.priority = ContainerRequest.where(container_uuid: self.requesting_container_uuid).maximum("priority") || 0
596 def get_requesting_container_uuid
597 return self.requesting_container_uuid || Container.for_current_token.andand.uuid