3 # A Locator is used to parse and manipulate Keep locator strings.
5 # Locators obey the following syntax:
7 # locator ::= address hint*
8 # address ::= digest size-hint
9 # digest ::= <32 hexadecimal digits>
10 # size-hint ::= "+" [0-9]+
11 # hint ::= "+" hint-type hint-content
13 # hint-content ::= [A-Za-z0-9@_-]+
15 # Individual hints may have their own required format:
17 # sign-hint ::= "+A" <40 lowercase hex digits> "@" sign-timestamp
18 # sign-timestamp ::= <8 lowercase hex digits>
19 attr_reader :hash, :hints, :size
21 LOCATOR_REGEXP = /^([[:xdigit:]]{32})(\+([[:digit:]]+))?((\+([[:upper:]][[:alnum:]@_-]*))+)?\z/
23 def initialize(hasharg, sizearg, hintarg)
30 !!(LOCATOR_REGEXP.match tok)
33 # Locator.parse returns a Locator object parsed from the string tok.
34 # Returns nil if tok could not be parsed as a valid locator.
38 rescue ArgumentError => e
43 # Locator.parse! returns a Locator object parsed from the string tok,
44 # raising an ArgumentError if tok cannot be parsed.
46 if tok.nil? or tok.empty?
47 raise ArgumentError.new "locator is nil or empty"
50 m = LOCATOR_REGEXP.match(tok)
52 raise ArgumentError.new "not a valid locator #{tok}"
55 tokhash, _, toksize, _, _, trailer = m[1..6]
58 trailer.split('+').each do |hint|
59 if hint =~ /^[[:upper:]][[:alnum:]@_-]*$/
62 raise ArgumentError.new "invalid hint #{hint}"
67 Locator.new(tokhash, toksize, tokhints)
70 # Returns the signature hint supplied with this locator,
71 # or nil if the locator was not signed.
73 @hints.grep(/^A/).first
76 # Returns an unsigned Locator.
78 Locator.new(@hash, @size, @hints.reject { |o| o.start_with?("A") })
82 Locator.new(@hash, @size, [])
92 [ @hash, @size, *@hints ].join('+')
94 [ @hash, *@hints ].join('+')
100 STRICT_STREAM_TOKEN_REGEXP = /^(\.)(\/[^\/\s]+)*$/
101 STRICT_FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\s\/]+(\/[^\s\/]+)*)$/
103 # Class to parse a manifest text and provide common views of that data.
104 def initialize(manifest_text)
105 @text = manifest_text
110 return to_enum(__method__) unless block_given?
111 @text.each_line do |line|
115 line.scan /\S+/ do |token|
117 stream_name = unescape token
118 elsif file_tokens.empty? and Locator.valid? token
119 block_tokens << token
121 file_tokens << unescape(token)
125 next if stream_name.nil?
126 yield [stream_name, block_tokens, file_tokens]
131 # Parse backslash escapes in a Keep manifest stream or file name.
132 s.gsub(/\\(\\|[0-7]{3})/) do |_|
142 def split_file_token token
143 start_pos, filesize, filename = token.split(':', 3)
145 raise ArgumentError.new "Invalid file token '#{token}'"
147 [start_pos.to_i, filesize.to_i, unescape(filename)]
151 return to_enum(__method__) unless block_given?
152 @text.each_line do |line|
154 in_file_tokens = false
155 line.scan /\S+/ do |token|
157 stream_name = unescape token
158 elsif in_file_tokens or not Locator.valid? token
159 in_file_tokens = true
161 file_tokens = split_file_token(token)
162 stream_name_adjuster = ''
163 if file_tokens[2].include?('/') # '/' in filename
164 parts = file_tokens[2].rpartition('/')
165 stream_name_adjuster = parts[1] + parts[0] # /dir_parts
166 file_tokens[2] = parts[2]
169 yield [stream_name + stream_name_adjuster] + file_tokens
178 file_sizes = Hash.new(0)
179 each_file_spec do |streamname, _, filesize, filename|
180 file_sizes[[streamname, filename]] += filesize
182 @files = file_sizes.each_pair.map do |(streamname, filename), size|
183 [streamname, filename, size]
189 def files_count(stop_after=nil)
190 # Return the number of files represented in this manifest.
191 # If stop_after is provided, files_count will read the manifest
192 # incrementally, and return immediately when it counts that number of
193 # files. This can help you avoid parsing the entire manifest if you
194 # just want to check if a small number of files are specified.
195 if stop_after.nil? or not @files.nil?
199 each_file_spec do |streamname, _, _, filename|
200 seen_files[[streamname, filename]] = true
201 return stop_after if (seen_files.size >= stop_after)
207 # Return the total size of all files in this manifest.
208 files.reduce(0) { |total, (_, _, size)| total + size }
211 def exact_file_count?(want_count)
212 files_count(want_count + 1) == want_count
215 def minimum_file_count?(want_count)
216 files_count(want_count) >= want_count
219 def has_file?(want_stream, want_file=nil)
221 want_stream, want_file = File.split(want_stream)
223 each_file_spec do |streamname, _, _, name|
224 if streamname == want_stream and name == want_file
231 # Verify that a given manifest is valid according to
232 # https://arvados.org/projects/arvados/wiki/Keep_manifest_format
233 def self.validate! manifest
234 raise ArgumentError.new "No manifest found" if !manifest
236 return true if manifest.empty?
238 raise ArgumentError.new "Invalid manifest: does not end with newline" if !manifest.end_with?("\n")
240 manifest.each_line do |line|
243 words = line[0..-2].split(/ /)
244 raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing stream name" if words.empty?
249 count += 1 if word =~ STRICT_STREAM_TOKEN_REGEXP and word !~ /\/\.\.?(\/|$)/
250 raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
254 while word =~ Locator::LOCATOR_REGEXP
258 raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
261 while word =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..','.']).empty?
267 raise ArgumentError.new "Manifest invalid for stream #{line_count}: invalid file token #{word.inspect}"
269 raise ArgumentError.new "Manifest invalid for stream #{line_count}: no file tokens"
272 # Ruby's split() method silently drops trailing empty tokens
273 # (which are not allowed by the manifest format) so we have to
274 # check trailing spaces manually.
275 raise ArgumentError.new "Manifest invalid for stream #{line_count}: trailing space" if line.end_with? " \n"
280 def self.valid? manifest