+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
module Keep
class Locator
# A Locator is used to parse and manipulate Keep locator strings.
# sign-timestamp ::= <8 lowercase hex digits>
attr_reader :hash, :hints, :size
- LOCATOR_REGEXP = /^([[:xdigit:]]{32})(\+([[:digit:]]+))?(\+([[:upper:]][[:alnum:]+@_-]*))?$/
+ LOCATOR_REGEXP = /^([[:xdigit:]]{32})(\+([[:digit:]]+))?((\+([[:upper:]][[:alnum:]@_-]*))+)?\z/
def initialize(hasharg, sizearg, hintarg)
@hash = hasharg
def self.parse(tok)
begin
Locator.parse!(tok)
- rescue ArgumentError => e
+ rescue ArgumentError
nil
end
end
raise ArgumentError.new "locator is nil or empty"
end
- m = LOCATOR_REGEXP.match(tok.strip)
+ m = LOCATOR_REGEXP.match(tok)
unless m
raise ArgumentError.new "not a valid locator #{tok}"
end
- tokhash, _, toksize, _, trailer = m[1..5]
+ tokhash, _, toksize, _, _, trailer = m[1..6]
tokhints = []
if trailer
trailer.split('+').each do |hint|
- if hint =~ /^[[:upper:]][[:alnum:]@_-]+$/
+ if hint =~ /^[[:upper:]][[:alnum:]@_-]*$/
tokhints.push(hint)
else
- raise ArgumentError.new "unknown hint #{hint}"
+ raise ArgumentError.new "invalid hint #{hint}"
end
end
end
end
class Manifest
+ STRICT_STREAM_TOKEN_REGEXP = /^(\.)(\/[^\/\s]+)*$/
+ STRICT_FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\s\/]+(\/[^\s\/]+)*)$/
+
# Class to parse a manifest text and provide common views of that data.
def initialize(manifest_text)
@text = manifest_text
stream_name = nil
block_tokens = []
file_tokens = []
- line.scan /\S+/ do |token|
+ line.scan(/\S+/) do |token|
if stream_name.nil?
stream_name = unescape token
elsif file_tokens.empty? and Locator.valid? token
@text.each_line do |line|
stream_name = nil
in_file_tokens = false
- line.scan /\S+/ do |token|
+ line.scan(/\S+/) do |token|
if stream_name.nil?
stream_name = unescape token
elsif in_file_tokens or not Locator.valid? token
in_file_tokens = true
- yield [stream_name] + split_file_token(token)
+
+ file_tokens = split_file_token(token)
+ stream_name_adjuster = ''
+ if file_tokens[2].include?('/') # '/' in filename
+ parts = file_tokens[2].rpartition('/')
+ stream_name_adjuster = parts[1] + parts[0] # /dir_parts
+ file_tokens[2] = parts[2]
+ end
+
+ yield [stream_name + stream_name_adjuster] + file_tokens
end
end
end
seen_files.size
end
+ def files_size
+ # Return the total size of all files in this manifest.
+ files.reduce(0) { |total, (_, _, size)| total + size }
+ end
+
def exact_file_count?(want_count)
files_count(want_count + 1) == want_count
end
end
false
end
+
+ # Verify that a given manifest is valid according to
+ # https://arvados.org/projects/arvados/wiki/Keep_manifest_format
+ def self.validate! manifest
+ raise ArgumentError.new "No manifest found" if !manifest
+
+ return true if manifest.empty?
+
+ raise ArgumentError.new "Invalid manifest: does not end with newline" if !manifest.end_with?("\n")
+ line_count = 0
+ manifest.each_line do |line|
+ line_count += 1
+
+ words = line[0..-2].split(/ /)
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing stream name" if words.empty?
+
+ count = 0
+
+ word = words.shift
+ count += 1 if word =~ STRICT_STREAM_TOKEN_REGEXP and word !~ /\/\.\.?(\/|$)/
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
+
+ count = 0
+ word = words.shift
+ while word =~ Locator::LOCATOR_REGEXP
+ word = words.shift
+ count += 1
+ end
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
+
+ count = 0
+ while word =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..','.']).empty?
+ word = words.shift
+ count += 1
+ end
+
+ if word
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: invalid file token #{word.inspect}"
+ elsif count == 0
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: no file tokens"
+ end
+
+ # Ruby's split() method silently drops trailing empty tokens
+ # (which are not allowed by the manifest format) so we have to
+ # check trailing spaces manually.
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: trailing space" if line.end_with? " \n"
+ end
+ true
+ end
+
+ def self.valid? manifest
+ begin
+ validate! manifest
+ true
+ rescue ArgumentError
+ false
+ end
+ end
end
end