X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/2b8f39b5067103d60f7bc43d26cb2f59126b2a42..6c78b28f9f54664babc57a4b4372c502065ed5d1:/services/api/app/models/container_request.rb diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb index 0c2ad09655..c412e4b850 100644 --- a/services/api/app/models/container_request.rb +++ b/services/api/app/models/container_request.rb @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 require 'whitelist_update' +require 'arvados/collection' class ContainerRequest < ArvadosModel include ArvadosModelUpdates @@ -18,13 +19,16 @@ class ContainerRequest < ArvadosModel primary_key: :uuid, } - serialize :properties, Hash + # Posgresql JSONB columns should NOT be declared as serialized, Rails 5 + # already know how to properly treat them. + attribute :properties, :jsonbHash, default: {} + attribute :secret_mounts, :jsonbHash, default: {} + serialize :environment, Hash serialize :mounts, Hash serialize :runtime_constraints, Hash serialize :command, Array serialize :scheduling_parameters, Hash - serialize :secret_mounts, Hash before_validation :fill_field_defaults, :if => :new_record? before_validation :validate_runtime_constraints @@ -115,13 +119,34 @@ class ContainerRequest < ArvadosModel end def finalize_if_needed - if state == Committed && Container.find_by_uuid(container_uuid).final? - reload - act_as_system_user do - leave_modified_by_user_alone do - finalize! + return if state != Committed + while true + # get container lock first, then lock current container request + # (same order as Container#handle_completed). Locking always + # reloads the Container and ContainerRequest records. + c = Container.find_by_uuid(container_uuid) + c.lock! + self.lock! + + if container_uuid != c.uuid + # After locking, we've noticed a race, the container_uuid is + # different than the container record we just loaded. This + # can happen if Container#handle_completed scheduled a new + # container for retry and set container_uuid while we were + # waiting on the container lock. Restart the loop and get the + # new container. + redo + end + + if state == Committed && c.final? + # The current container is + act_as_system_user do + leave_modified_by_user_alone do + finalize! + end end end + return true end end @@ -154,13 +179,23 @@ class ContainerRequest < ArvadosModel coll = Collection.new( owner_uuid: self.owner_uuid, name: coll_name, + manifest_text: "", properties: { 'type' => out_type, 'container_request' => uuid, }) end + + if out_type == "log" + src = Arv::Collection.new(manifest) + dst = Arv::Collection.new(coll.manifest_text) + dst.cp_r("./", ".", src) + dst.cp_r("./", "log for container #{container.uuid}", src) + manifest = dst.manifest_text + end + coll.assign_attributes( - portable_data_hash: pdh, + portable_data_hash: Digest::MD5.hexdigest(manifest) + '+' + manifest.bytesize.to_s, manifest_text: manifest, trash_at: trash_at, delete_at: trash_at) @@ -180,8 +215,9 @@ class ContainerRequest < ArvadosModel self.environment ||= {} self.runtime_constraints ||= {} self.mounts ||= {} + self.secret_mounts ||= {} self.cwd ||= "." - self.container_count_max ||= Rails.configuration.container_count_max + self.container_count_max ||= Rails.configuration.Containers.MaxRetryAttempts self.scheduling_parameters ||= {} self.output_ttl ||= 0 self.priority ||= 0 @@ -195,7 +231,18 @@ class ContainerRequest < ArvadosModel return false end if state_changed? and state == Committed and container_uuid.nil? - self.container_uuid = Container.resolve(self).uuid + while true + c = Container.resolve(self) + c.lock! + if c.state == Container::Cancelled + # Lost a race, we have a lock on the container but the + # container was cancelled in a different request, restart + # the loop and resolve request to a new container. + redo + end + self.container_uuid = c.uuid + break + end end if self.container_uuid != self.container_uuid_was if self.container_count_changed? @@ -203,6 +250,31 @@ class ContainerRequest < ArvadosModel return false else self.container_count += 1 + if self.container_uuid_was + old_container = Container.find_by_uuid(self.container_uuid_was) + old_logs = Collection.where(portable_data_hash: old_container.log).first + if old_logs + log_coll = self.log_uuid.nil? ? nil : Collection.where(uuid: self.log_uuid).first + if self.log_uuid.nil? + log_coll = Collection.new( + owner_uuid: self.owner_uuid, + name: coll_name = "Container log for request #{uuid}", + manifest_text: "") + end + + # copy logs from old container into CR's log collection + src = Arv::Collection.new(old_logs.manifest_text) + dst = Arv::Collection.new(log_coll.manifest_text) + dst.cp_r("./", "log for container #{old_container.uuid}", src) + manifest = dst.manifest_text + + log_coll.assign_attributes( + portable_data_hash: Digest::MD5.hexdigest(manifest) + '+' + manifest.bytesize.to_s, + manifest_text: manifest) + log_coll.save_with_unique_name! + self.log_uuid = log_coll.uuid + end + end end end end @@ -212,7 +284,7 @@ class ContainerRequest < ArvadosModel if self.state == Committed # If preemptible instances (eg: AWS Spot Instances) are allowed, # ask them on child containers by default. - if Rails.configuration.preemptible_instances and !c.nil? and + if Rails.configuration.Containers.UsePreemptibleInstances and !c.nil? and self.scheduling_parameters['preemptible'].nil? self.scheduling_parameters['preemptible'] = true end @@ -282,7 +354,7 @@ class ContainerRequest < ArvadosModel scheduling_parameters['partitions'].size) errors.add :scheduling_parameters, "partitions must be an array of strings" end - if !Rails.configuration.preemptible_instances and scheduling_parameters['preemptible'] + if !Rails.configuration.Containers.UsePreemptibleInstances and scheduling_parameters['preemptible'] errors.add :scheduling_parameters, "preemptible instances are not allowed" end if scheduling_parameters.include? 'max_run_time' and