18562: Rename config to AlwaysUsePreemptibleInstances.
[arvados.git] / services / api / app / models / container_request.rb
index 921d4bee60f7f5f679b0531d61f259f15b4ff96c..440c3ff3551c4ace359675415cc9d144b09af807 100644 (file)
@@ -19,28 +19,34 @@ class ContainerRequest < ArvadosModel
                primary_key: :uuid,
              }
 
-  serialize :properties, Hash
+  # Posgresql JSONB columns should NOT be declared as serialized, Rails 5
+  # already know how to properly treat them.
+  attribute :properties, :jsonbHash, default: {}
+  attribute :secret_mounts, :jsonbHash, default: {}
+  attribute :output_storage_classes, :jsonbArray, default: lambda { Rails.configuration.DefaultStorageClasses }
+
   serialize :environment, Hash
   serialize :mounts, Hash
   serialize :runtime_constraints, Hash
   serialize :command, Array
   serialize :scheduling_parameters, Hash
-  serialize :secret_mounts, Hash
 
+  after_find :fill_container_defaults_after_find
   before_validation :fill_field_defaults, :if => :new_record?
-  before_validation :validate_runtime_constraints
-  before_validation :set_default_preemptible_scheduling_parameter
-  before_validation :set_container
+  before_validation :fill_container_defaults
   validates :command, :container_image, :output_path, :cwd, :presence => true
   validates :output_ttl, numericality: { only_integer: true, greater_than_or_equal_to: 0 }
   validates :priority, numericality: { only_integer: true, greater_than_or_equal_to: 0, less_than_or_equal_to: 1000 }
   validate :validate_datatypes
+  validate :validate_runtime_constraints
   validate :validate_scheduling_parameters
   validate :validate_state_change
   validate :check_update_whitelist
   validate :secret_mounts_key_conflict
   validate :validate_runtime_token
-  before_save :scrub_secrets
+  after_validation :scrub_secrets
+  after_validation :set_preemptible
+  after_validation :set_container
   before_create :set_requesting_container_uuid
   before_destroy :set_priority_zero
   after_save :update_priority
@@ -71,6 +77,7 @@ class ContainerRequest < ArvadosModel
     t.add :scheduling_parameters
     t.add :state
     t.add :use_existing
+    t.add :output_storage_classes
   end
 
   # Supported states for a container request
@@ -92,7 +99,14 @@ class ContainerRequest < ArvadosModel
   :container_image, :cwd, :environment, :filters, :mounts,
   :output_path, :priority, :runtime_token,
   :runtime_constraints, :state, :container_uuid, :use_existing,
-  :scheduling_parameters, :secret_mounts, :output_name, :output_ttl]
+  :scheduling_parameters, :secret_mounts, :output_name, :output_ttl,
+  :output_storage_classes]
+
+  def self.any_preemptible_instances?
+    Rails.configuration.InstanceTypes.any? do |k, v|
+      v["Preemptible"]
+    end
+  end
 
   def self.limit_index_columns_read
     ["mounts"]
@@ -116,20 +130,69 @@ class ContainerRequest < ArvadosModel
   end
 
   def finalize_if_needed
-    if state == Committed && Container.find_by_uuid(container_uuid).final?
-      reload
-      act_as_system_user do
-        leave_modified_by_user_alone do
-          finalize!
+    return if state != Committed
+    while true
+      # get container lock first, then lock current container request
+      # (same order as Container#handle_completed). Locking always
+      # reloads the Container and ContainerRequest records.
+      c = Container.find_by_uuid(container_uuid)
+      c.lock! if !c.nil?
+      self.lock!
+
+      if !c.nil? && container_uuid != c.uuid
+        # After locking, we've noticed a race, the container_uuid is
+        # different than the container record we just loaded.  This
+        # can happen if Container#handle_completed scheduled a new
+        # container for retry and set container_uuid while we were
+        # waiting on the container lock.  Restart the loop and get the
+        # new container.
+        redo
+      end
+
+      if !c.nil?
+        if state == Committed && c.final?
+          # The current container is
+          act_as_system_user do
+            leave_modified_by_user_alone do
+              finalize!
+            end
+          end
         end
+      elsif state == Committed
+        # Behave as if the container is cancelled
+        update_attributes!(state: Final)
       end
+      return true
     end
   end
 
   # Finalize the container request after the container has
   # finished/cancelled.
   def finalize!
-    update_collections(container: Container.find_by_uuid(container_uuid))
+    container = Container.find_by_uuid(container_uuid)
+    if !container.nil?
+      update_collections(container: container)
+
+      if container.state == Container::Complete
+        log_col = Collection.where(portable_data_hash: container.log).first
+        if log_col
+          # Need to save collection
+          completed_coll = Collection.new(
+            owner_uuid: self.owner_uuid,
+            name: "Container log for container #{container_uuid}",
+            properties: {
+              'type' => 'log',
+              'container_request' => self.uuid,
+              'container_uuid' => container_uuid,
+            },
+            portable_data_hash: log_col.portable_data_hash,
+            manifest_text: log_col.manifest_text,
+            storage_classes_desired: self.output_storage_classes
+          )
+          completed_coll.save_with_unique_name!
+        end
+      end
+    end
     update_attributes!(state: Final)
   end
 
@@ -137,17 +200,20 @@ class ContainerRequest < ArvadosModel
     collections.each do |out_type|
       pdh = container.send(out_type)
       next if pdh.nil?
+      c = Collection.where(portable_data_hash: pdh).first
+      next if c.nil?
+      manifest = c.manifest_text
+
       coll_name = "Container #{out_type} for request #{uuid}"
       trash_at = nil
       if out_type == 'output'
-        if self.output_name
+        if self.output_name and self.output_name != ""
           coll_name = self.output_name
         end
         if self.output_ttl > 0
           trash_at = db_current_time + self.output_ttl
         end
       end
-      manifest = Collection.where(portable_data_hash: pdh).first.manifest_text
 
       coll_uuid = self.send(out_type + '_uuid')
       coll = coll_uuid.nil? ? nil : Collection.where(uuid: coll_uuid).first
@@ -156,6 +222,7 @@ class ContainerRequest < ArvadosModel
           owner_uuid: self.owner_uuid,
           name: coll_name,
           manifest_text: "",
+          storage_classes_desired: self.output_storage_classes,
           properties: {
             'type' => out_type,
             'container_request' => uuid,
@@ -163,6 +230,7 @@ class ContainerRequest < ArvadosModel
       end
 
       if out_type == "log"
+        # Copy the log into a merged collection
         src = Arv::Collection.new(manifest)
         dst = Arv::Collection.new(coll.manifest_text)
         dst.cp_r("./", ".", src)
@@ -181,7 +249,7 @@ class ContainerRequest < ArvadosModel
   end
 
   def self.full_text_searchable_columns
-    super - ["mounts", "secret_mounts", "secret_mounts_md5", "runtime_token"]
+    super - ["mounts", "secret_mounts", "secret_mounts_md5", "runtime_token", "output_storage_classes"]
   end
 
   protected
@@ -191,8 +259,9 @@ class ContainerRequest < ArvadosModel
     self.environment ||= {}
     self.runtime_constraints ||= {}
     self.mounts ||= {}
+    self.secret_mounts ||= {}
     self.cwd ||= "."
-    self.container_count_max ||= Rails.configuration.container_count_max
+    self.container_count_max ||= Rails.configuration.Containers.MaxRetryAttempts
     self.scheduling_parameters ||= {}
     self.output_ttl ||= 0
     self.priority ||= 0
@@ -205,67 +274,73 @@ class ContainerRequest < ArvadosModel
       errors.add :container_uuid, "can only be updated to nil."
       return false
     end
+    if self.container_count_changed?
+      errors.add :container_count, "cannot be updated directly."
+      return false
+    end
     if state_changed? and state == Committed and container_uuid.nil?
-      self.container_uuid = Container.resolve(self).uuid
+      while true
+        c = Container.resolve(self)
+        c.lock!
+        if c.state == Container::Cancelled
+          # Lost a race, we have a lock on the container but the
+          # container was cancelled in a different request, restart
+          # the loop and resolve request to a new container.
+          redo
+        end
+        self.container_uuid = c.uuid
+        break
+      end
     end
     if self.container_uuid != self.container_uuid_was
-      if self.container_count_changed?
-        errors.add :container_count, "cannot be updated directly."
-        return false
-      else
-        self.container_count += 1
-        if self.container_uuid_was
-          old_container = Container.find_by_uuid(self.container_uuid_was)
-          old_logs = Collection.where(portable_data_hash: old_container.log).first
-          if old_logs
-            log_coll = self.log_uuid.nil? ? nil : Collection.where(uuid: self.log_uuid).first
-            if self.log_uuid.nil?
-              log_coll = Collection.new(
-                owner_uuid: self.owner_uuid,
-                name: coll_name = "Container log for request #{uuid}",
-                manifest_text: "")
-            end
+      self.container_count += 1
+      return if self.container_uuid_was.nil?
 
-            # copy logs from old container into CR's log collection
-            src = Arv::Collection.new(old_logs.manifest_text)
-            dst = Arv::Collection.new(log_coll.manifest_text)
-            dst.cp_r("./", "log for container #{old_container.uuid}", src)
-            manifest = dst.manifest_text
-
-            log_coll.assign_attributes(
-              portable_data_hash: Digest::MD5.hexdigest(manifest) + '+' + manifest.bytesize.to_s,
-              manifest_text: manifest)
-            log_coll.save_with_unique_name!
-            self.log_uuid = log_coll.uuid
-          end
-        end
+      old_container = Container.find_by_uuid(self.container_uuid_was)
+      return if old_container.nil?
+
+      old_logs = Collection.where(portable_data_hash: old_container.log).first
+      return if old_logs.nil?
+
+      log_coll = self.log_uuid.nil? ? nil : Collection.where(uuid: self.log_uuid).first
+      if self.log_uuid.nil?
+        log_coll = Collection.new(
+          owner_uuid: self.owner_uuid,
+          name: coll_name = "Container log for request #{uuid}",
+          manifest_text: "",
+          storage_classes_desired: self.output_storage_classes)
       end
+
+      # copy logs from old container into CR's log collection
+      src = Arv::Collection.new(old_logs.manifest_text)
+      dst = Arv::Collection.new(log_coll.manifest_text)
+      dst.cp_r("./", "log for container #{old_container.uuid}", src)
+      manifest = dst.manifest_text
+
+      log_coll.assign_attributes(
+        portable_data_hash: Digest::MD5.hexdigest(manifest) + '+' + manifest.bytesize.to_s,
+        manifest_text: manifest)
+      log_coll.save_with_unique_name!
+      self.log_uuid = log_coll.uuid
     end
   end
 
-  def set_default_preemptible_scheduling_parameter
-    c = get_requesting_container()
-    if self.state == Committed
-      # If preemptible instances (eg: AWS Spot Instances) are allowed,
-      # ask them on child containers by default.
-      if Rails.configuration.preemptible_instances and !c.nil? and
-        self.scheduling_parameters['preemptible'].nil?
-          self.scheduling_parameters['preemptible'] = true
-      end
+  def set_preemptible
+    if (new_record? || state_changed?) &&
+       state == Committed &&
+       Rails.configuration.Containers.AlwaysUsePreemptibleInstances &&
+       get_requesting_container_uuid() &&
+       self.class.any_preemptible_instances?
+      self.scheduling_parameters['preemptible'] = true
     end
   end
 
   def validate_runtime_constraints
     case self.state
     when Committed
-      [['vcpus', true],
-       ['ram', true],
-       ['keep_cache_ram', false]].each do |k, required|
-        if !required && !runtime_constraints.include?(k)
-          next
-        end
+      ['vcpus', 'ram'].each do |k|
         v = runtime_constraints[k]
-        unless (v.is_a?(Integer) && v > 0)
+        if !v.is_a?(Integer) || v <= 0
           errors.add(:runtime_constraints,
                      "[#{k}]=#{v.inspect} must be a positive integer")
         end
@@ -318,8 +393,10 @@ class ContainerRequest < ArvadosModel
             scheduling_parameters['partitions'].size)
             errors.add :scheduling_parameters, "partitions must be an array of strings"
       end
-      if !Rails.configuration.preemptible_instances and scheduling_parameters['preemptible']
-        errors.add :scheduling_parameters, "preemptible instances are not allowed"
+      if scheduling_parameters['preemptible'] &&
+         (new_record? || state_changed?) &&
+         !self.class.any_preemptible_instances?
+        errors.add :scheduling_parameters, "preemptible instances are not configured in InstanceTypes"
       end
       if scheduling_parameters.include? 'max_run_time' and
         (!scheduling_parameters['max_run_time'].is_a?(Integer) ||
@@ -335,26 +412,33 @@ class ContainerRequest < ArvadosModel
     if self.new_record? || self.state_was == Uncommitted
       # Allow create-and-commit in a single operation.
       permitted.push(*AttrsPermittedBeforeCommit)
+    elsif mounts_changed? && mounts_was.keys.sort == mounts.keys.sort
+      # Ignore the updated mounts if the only changes are default/zero
+      # values as added by controller, see 17774
+      only_defaults = true
+      mounts.each do |path, mount|
+        (mount.to_a - mounts_was[path].to_a).each do |k, v|
+          if ![0, "", false, nil].index(v)
+            only_defaults = false
+          end
+        end
+      end
+      if only_defaults
+        clear_attribute_change("mounts")
+      end
     end
 
     case self.state
     when Committed
       permitted.push :priority, :container_count_max, :container_uuid
 
-      if self.container_uuid.nil?
-        self.errors.add :container_uuid, "has not been resolved to a container."
-      end
-
       if self.priority.nil?
         self.errors.add :priority, "cannot be nil"
       end
 
-      # Allow container count to increment by 1
-      if (self.container_uuid &&
-          self.container_uuid != self.container_uuid_was &&
-          self.container_count == 1 + (self.container_count_was || 0))
-        permitted.push :container_count
-      end
+      # Allow container count to increment (not by client, only by us
+      # -- see set_container)
+      permitted.push :container_count
 
       if current_user.andand.is_admin
         permitted.push :log_uuid
@@ -404,10 +488,10 @@ class ContainerRequest < ArvadosModel
   end
 
   def update_priority
-    return unless state_changed? || priority_changed? || container_uuid_changed?
+    return unless saved_change_to_state? || saved_change_to_priority? || saved_change_to_container_uuid?
     act_as_system_user do
       Container.
-        where('uuid in (?)', [self.container_uuid_was, self.container_uuid].compact).
+        where('uuid in (?)', [container_uuid_before_last_save, self.container_uuid].compact).
         map(&:update_priority!)
     end
   end
@@ -417,17 +501,14 @@ class ContainerRequest < ArvadosModel
   end
 
   def set_requesting_container_uuid
-    c = get_requesting_container()
-    if !c.nil?
-      self.requesting_container_uuid = c.uuid
+    if (self.requesting_container_uuid = get_requesting_container_uuid())
       # Determine the priority of container request for the requesting
       # container.
       self.priority = ContainerRequest.where(container_uuid: self.requesting_container_uuid).maximum("priority") || 0
     end
   end
 
-  def get_requesting_container
-    return self.requesting_container_uuid if !self.requesting_container_uuid.nil?
-    Container.for_current_token
+  def get_requesting_container_uuid
+    return self.requesting_container_uuid || Container.for_current_token.andand.uuid
   end
 end