14484: Adds functionality and test for pdh grouping in the container model
[arvados.git] / services / api / app / models / container.rb
index 6ef9d7cbd338661a687d6a529574d09f8f4f7bc5..b3328d9c76942949f8153449ec1368e4eb8418ba 100644 (file)
@@ -224,26 +224,10 @@ class Container < ArvadosModel
         next
       end
 
-      pdh = mount['portable_data_hash']
-      uuid = mount['uuid']
+      uuid = mount.delete 'uuid'
 
-      # Prioritize PDH over UUID for mount selection
-      if not pdh.nil?
-        c = Collection.
-          readable_by(current_user).
-          where(portable_data_hash: pdh)
-        if c.count > 0
-          if uuid && c.where(uuid: uuid).count == 0
-            raise ArgumentError.new "cannot mount collection #{uuid.inspect}: current portable_data_hash #{pdh.inspect} does not match #{c.first.portable_data_hash.inspect} in request"
-          end
-          # mount.delete 'uuid'
-          next
-        end
-        raise ArgumentError.new "cannot mount collection #{pdh.inspect}: not found"
-      end
-
-      # PDH not supplied, try by UUID
-      if uuid
+      if mount['portable_data_hash'].nil? and !uuid.nil?
+        # PDH not supplied, try by UUID
         c = Collection.
           readable_by(current_user).
           where(uuid: uuid).
@@ -295,14 +279,6 @@ class Container < ArvadosModel
     candidates = candidates.where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]), md5: true)
     log_reuse_info(candidates) { "after filtering on runtime_constraints #{attrs[:runtime_constraints].inspect}" }
 
-    candidates = candidates.where('runtime_user_uuid = ? or (runtime_user_uuid is NULL and runtime_auth_scopes is NULL)',
-                                  attrs[:runtime_user_uuid])
-    log_reuse_info(candidates) { "after filtering on runtime_user_uuid #{attrs[:runtime_user_uuid].inspect}" }
-
-    candidates = candidates.where('runtime_auth_scopes = ? or (runtime_user_uuid is NULL and runtime_auth_scopes is NULL)',
-                                  SafeJSON.dump(attrs[:runtime_auth_scopes].sort))
-    log_reuse_info(candidates) { "after filtering on runtime_auth_scopes #{attrs[:runtime_auth_scopes].inspect}" }
-
     log_reuse_info { "checking for state=Complete with readable output and log..." }
 
     select_readable_pdh = Collection.
@@ -370,7 +346,7 @@ class Container < ArvadosModel
     transaction do
       reload
       check_lock_fail
-      update_attributes!(state: Locked)
+      update_attributes!(state: Locked, lock_count: self.lock_count+1)
     end
   end
 
@@ -388,7 +364,14 @@ class Container < ArvadosModel
     transaction do
       reload(lock: 'FOR UPDATE')
       check_unlock_fail
-      update_attributes!(state: Queued)
+      if self.lock_count < Rails.configuration.max_container_dispatch_attempts
+        update_attributes!(state: Queued)
+      else
+        update_attributes!(state: Cancelled,
+                           runtime_status: {
+                             error: "Container exceeded 'max_container_dispatch_attempts' (lock_count=#{self.lock_count}."
+                           })
+      end
     end
   end
 
@@ -399,6 +382,9 @@ class Container < ArvadosModel
     else
       kwargs = {}
     end
+    if users_list.select { |u| u.is_admin }.any?
+      return super
+    end
     Container.where(ContainerRequest.readable_by(*users_list).where("containers.uuid = container_requests.container_uuid").exists)
   end
 
@@ -419,6 +405,67 @@ class Container < ArvadosModel
     end
   end
 
+  # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
+  #
+  # Change with caution!
+  #
+  # Correctly groups pdhs to use for batch database updates. Helps avoid
+  # updating too many database rows in a single transaction.
+  def self.group_pdhs_for_multiple_transactions(log_prefix)
+    batch_size_max = 1 << 28 # 256 MiB
+    last_pdh = '0'
+    done = 0
+    any = true
+
+    total = ActiveRecord::Base.connection.exec_query(
+      'SELECT DISTINCT portable_data_hash FROM collections'
+    ).rows.count
+
+    while any
+      any = false
+      pdhs = ActiveRecord::Base.connection.exec_query(
+        'SELECT DISTINCT portable_data_hash FROM collections '\
+        "WHERE portable_data_hash > '#{last_pdh}' "\
+        'GROUP BY portable_data_hash LIMIT 1000'
+      )
+      if pdhs.rows.count.zero?
+        break
+      end
+
+      Container.group_pdhs_by_manifest_size(pdhs, batch_size_max) do |grouped_pdhs|
+        any = true
+        yield grouped_pdhs
+        done += grouped_pdhs.size
+        last_pdh = pdhs[-1]
+        Rails.logger.info(log_prefix + ": #{done}/#{total}")
+      end
+    end
+    Rails.logger.info(log_prefix + ': finished')
+  end
+
+  # NOTE: Migration 20190322174136_add_file_info_to_collection.rb relies on this function.
+  #
+  # Change with caution!
+  #
+  # Given an array of pdhs, yield a subset array of pdhs when the total
+  # size of all manifest_texts is no more than batch_size_max. Pdhs whose manifest_text 
+  # is bigger than batch_size_max are yielded by themselves
+  def self.group_pdhs_by_manifest_size(pdhs, batch_size_max)
+    batch_size = 0
+    batch_pdhs = {}
+    pdhs.each do |pdh|
+      manifest_size = pdh.split('+')[1].to_i
+      if batch_size > 0 && batch_size + manifest_size > batch_size_max
+        yield batch_pdhs.keys
+        batch_pdhs = {}
+        batch_size = 0
+      end
+      batch_pdhs[pdh] = true
+      batch_size += manifest_size
+    end
+    yield batch_pdhs.keys
+  end
+
   protected
 
   def fill_field_defaults
@@ -478,7 +525,7 @@ class Container < ArvadosModel
 
     case self.state
     when Locked
-      permitted.push :priority, :runtime_status, :log
+      permitted.push :priority, :runtime_status, :log, :lock_count
 
     when Queued
       permitted.push :priority
@@ -499,7 +546,7 @@ class Container < ArvadosModel
       when Running
         permitted.push :finished_at, *progress_attrs
       when Queued, Locked
-        permitted.push :finished_at, :log
+        permitted.push :finished_at, :log, :runtime_status
       end
 
     else