closes #4523
[arvados.git] / services / api / app / models / collection.rb
index 08c50d9480aad17e247e38d049f278cd5d29e837..8258d1f53cd5a39495b2a30493492f3a416807c7 100644 (file)
@@ -5,10 +5,12 @@ class Collection < ArvadosModel
   include KindAndEtag
   include CommonApiTemplate
 
+  before_validation :check_encoding
   before_validation :check_signatures
   before_validation :strip_manifest_text
   before_validation :set_portable_data_hash
   validate :ensure_hash_matches_manifest_text
+  before_save :set_file_names
 
   # Query only undeleted collections by default.
   default_scope where("expires_at IS NULL or expires_at > CURRENT_TIMESTAMP")
@@ -125,6 +127,54 @@ class Collection < ArvadosModel
     end
   end
 
+  def set_file_names
+    if self.manifest_text_changed?
+      self.file_names = manifest_files
+    end
+    true
+  end
+
+  def manifest_files
+    names = ''
+    if self.manifest_text
+      self.manifest_text.scan(/ \d+:\d+:(\S+)/) do |name|
+        names << name.first.gsub('\040',' ') + "\n"
+        break if names.length > 2**13
+      end
+    end
+
+    if self.manifest_text and names.length < 2**13
+      self.manifest_text.scan(/^\.\/(\S+)/m) do |stream_name|
+        names << stream_name.first.gsub('\040',' ') + "\n"
+        break if names.length > 2**13
+      end
+    end
+
+    names[0,2**13]
+  end
+
+  def check_encoding
+    if manifest_text.encoding.name == 'UTF-8' and manifest_text.valid_encoding?
+      true
+    else
+      begin
+        # If Ruby thinks the encoding is something else, like 7-bit
+        # ASCII, but its stored bytes are equal to the (valid) UTF-8
+        # encoding of the same string, we declare it to be a UTF-8
+        # string.
+        utf8 = manifest_text
+        utf8.force_encoding Encoding::UTF_8
+        if utf8.valid_encoding? and utf8 == manifest_text.encode(Encoding::UTF_8)
+          manifest_text = utf8
+          return true
+        end
+      rescue
+      end
+      errors.add :manifest_text, "must use UTF-8 encoding"
+      false
+    end
+  end
+
   def redundancy_status
     if redundancy_confirmed_as.nil?
       'unconfirmed'
@@ -246,6 +296,10 @@ class Collection < ArvadosModel
     find_all_for_docker_image(search_term, search_tag, readers).first
   end
 
+  def self.searchable_columns operator
+    super - ["manifest_text"]
+  end
+
   protected
   def portable_manifest_text
     portable_manifest = self[:manifest_text].dup