# sign-timestamp ::= <8 lowercase hex digits>
attr_reader :hash, :hints, :size
+ LOCATOR_REGEXP = /^([[:xdigit:]]{32})(\+([[:digit:]]+))?((\+([[:upper:]][[:alnum:]@_-]*))+)?\z/
+
def initialize(hasharg, sizearg, hintarg)
@hash = hasharg
@size = sizearg
@hints = hintarg
end
+ def self.valid? tok
+ !!(LOCATOR_REGEXP.match tok)
+ end
+
# Locator.parse returns a Locator object parsed from the string tok.
# Returns nil if tok could not be parsed as a valid locator.
def self.parse(tok)
raise ArgumentError.new "locator is nil or empty"
end
- m = /^([[:xdigit:]]{32})(\+([[:digit:]]+))?(\+([[:upper:]][[:alnum:]+@_-]*))?$/.match(tok.strip)
+ m = LOCATOR_REGEXP.match(tok.strip)
unless m
raise ArgumentError.new "not a valid locator #{tok}"
end
end
class Manifest
+ STREAM_REGEXP = /(\.)((\/+.*[^\/])*)$/
+ FILE_REGEXP = /^[[:digit:]]+:[[:digit:]]+:(?!\/).*[^\/]$/
+
# Class to parse a manifest text and provide common views of that data.
def initialize(manifest_text)
@text = manifest_text
def each_line
return to_enum(__method__) unless block_given?
@text.each_line do |line|
- tokens = line.split
- stream_name = unescape(tokens.shift)
- blocks = []
- while loc = Locator.parse(tokens.first)
- blocks << loc
- tokens.shift
+ stream_name = nil
+ block_tokens = []
+ file_tokens = []
+ line.scan /\S+/ do |token|
+ if stream_name.nil?
+ stream_name = unescape token
+ elsif file_tokens.empty? and Locator.valid? token
+ block_tokens << token
+ else
+ file_tokens << unescape(token)
+ end
end
- yield [stream_name, blocks, tokens.map { |s| unescape(s) }]
+ # Ignore blank lines
+ next if stream_name.nil?
+ yield [stream_name, block_tokens, file_tokens]
end
end
end
end
- def each_file_spec(speclist)
- return to_enum(__method__, speclist) unless block_given?
- speclist.each do |filespec|
- start_pos, filesize, filename = filespec.split(':', 3)
- yield [start_pos.to_i, filesize.to_i, filename]
+ def split_file_token token
+ start_pos, filesize, filename = token.split(':', 3)
+ if filename.nil?
+ raise ArgumentError.new "Invalid file token '#{token}'"
+ end
+ [start_pos.to_i, filesize.to_i, unescape(filename)]
+ end
+
+ def each_file_spec
+ return to_enum(__method__) unless block_given?
+ @text.each_line do |line|
+ stream_name = nil
+ in_file_tokens = false
+ line.scan /\S+/ do |token|
+ if stream_name.nil?
+ stream_name = unescape token
+ elsif in_file_tokens or not Locator.valid? token
+ in_file_tokens = true
+
+ file_tokens = split_file_token(token)
+ stream_name_adjuster = ''
+ if file_tokens[2].include?('/') # '/' in filename
+ parts = file_tokens[2].rpartition('/')
+ stream_name_adjuster = parts[1] + parts[0] # /dir_parts
+ file_tokens[2] = parts[2]
+ end
+
+ yield [stream_name + stream_name_adjuster] + file_tokens
+ end
+ end
end
+ true
end
def files
if @files.nil?
file_sizes = Hash.new(0)
- each_line do |streamname, blocklist, filelist|
- each_file_spec(filelist) do |_, filesize, filename|
- file_sizes[[streamname, filename]] += filesize
- end
+ each_file_spec do |streamname, _, filesize, filename|
+ file_sizes[[streamname, filename]] += filesize
end
@files = file_sizes.each_pair.map do |(streamname, filename), size|
[streamname, filename, size]
return files.size
end
seen_files = {}
- each_line do |streamname, blocklist, filelist|
- each_file_spec(filelist) do |_, _, filename|
- seen_files[[streamname, filename]] = true
- return stop_after if (seen_files.size >= stop_after)
- end
+ each_file_spec do |streamname, _, _, filename|
+ seen_files[[streamname, filename]] = true
+ return stop_after if (seen_files.size >= stop_after)
end
seen_files.size
end
+ def files_size
+ # Return the total size of all files in this manifest.
+ files.reduce(0) { |total, (_, _, size)| total + size }
+ end
+
def exact_file_count?(want_count)
files_count(want_count + 1) == want_count
end
if want_file.nil?
want_stream, want_file = File.split(want_stream)
end
- each_line do |stream_name, _, filelist|
- if (stream_name == want_stream) and
- each_file_spec(filelist).any? { |_, _, name| name == want_file }
+ each_file_spec do |streamname, _, _, name|
+ if streamname == want_stream and name == want_file
return true
end
end
false
end
+
+ # Verify that a given manifest is valid as per the manifest format definition.
+ # Valid format: stream name + one or more locators + one or more files for each stream in manifest.
+ # https://arvados.org/projects/arvados/wiki/Keep_manifest_format
+ def self.valid?(manifest)
+ raise ArgumentError.new "Invalid manifest: does not end with new line" if !manifest.end_with?("\n")
+ line_count = 0
+ manifest.each_line do |line|
+ line_count += 1
+
+ words = line.split(/[[:space:]]/)
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing stream name" if words.empty?
+
+ count = 0
+
+ word = words.shift
+ count += 1 if word =~ STREAM_REGEXP and !word.include? '//'
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
+
+ count = 0
+ word = words.shift
+ while word =~ Locator::LOCATOR_REGEXP
+ word = words.shift
+ count += 1
+ end
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
+
+ count = 0
+ while(word =~ FILE_REGEXP and !word.include? '//')
+ word = words.shift
+ count += 1
+ end
+
+ if word
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: invalid file token #{word.inspect}"
+ elsif count == 0
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: no file tokens"
+ end
+ end
+ end
end
end