end
class Manifest
- STRICT_STREAM_TOKEN_REGEXP = /^(\.)(\/[^\/\s]+)*$/
- STRICT_FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\s\/]+(\/[^\s\/]+)*)$/
+ STREAM_TOKEN_REGEXP = /^([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
+ STREAM_NAME_REGEXP = /^(\.)(\/[^\/]+)*$/
+
+ EMPTY_DIR_TOKEN_REGEXP = /^0:0:\.$/ # The exception when a file can have '.' as a name
+ FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
+ FILE_NAME_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\/]+(\/[^\/]+)*)$/
+
+ NON_8BIT_ENCODED_CHAR = /[^\\]\\[4-7][0-7][0-7]/
# Class to parse a manifest text and provide common views of that data.
def initialize(manifest_text)
end
end
- def unescape(s)
+ def self.unescape(s)
+ return nil if s.nil?
+
# Parse backslash escapes in a Keep manifest stream or file name.
s.gsub(/\\(\\|[0-7]{3})/) do |_|
case $1
end
end
+ def unescape(s)
+ self.class.unescape(s)
+ end
+
def split_file_token token
start_pos, filesize, filename = token.split(':', 3)
if filename.nil?
elsif in_file_tokens or not Locator.valid? token
in_file_tokens = true
- file_tokens = split_file_token(token)
+ start_pos, file_size, file_name = split_file_token(token)
stream_name_adjuster = ''
- if file_tokens[2].include?('/') # '/' in filename
- parts = file_tokens[2].rpartition('/')
- stream_name_adjuster = parts[1] + parts[0] # /dir_parts
- file_tokens[2] = parts[2]
+ if file_name.include?('/') # '/' in filename
+ dirname, sep, basename = file_name.rpartition('/')
+ stream_name_adjuster = sep + dirname # /dir_parts
+ file_name = basename
end
- yield [stream_name + stream_name_adjuster] + file_tokens
+ yield [stream_name + stream_name_adjuster, start_pos, file_size, file_name]
end
end
end
# files. This can help you avoid parsing the entire manifest if you
# just want to check if a small number of files are specified.
if stop_after.nil? or not @files.nil?
- return files.size
+ # Avoid counting empty dir placeholders
+ return files.reject{|_, name, size| name == '.' and size == 0}.size
end
seen_files = {}
- each_file_spec do |streamname, _, _, filename|
+ each_file_spec do |streamname, _, filesize, filename|
+ # Avoid counting empty dir placeholders
+ next if filename == "." and filesize == 0
seen_files[[streamname, filename]] = true
return stop_after if (seen_files.size >= stop_after)
end
count = 0
word = words.shift
- count += 1 if word =~ STRICT_STREAM_TOKEN_REGEXP and word !~ /\/\.\.?(\/|$)/
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on stream token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
+ unescaped_word = unescape(word)
+ count += 1 if word =~ STREAM_TOKEN_REGEXP and unescaped_word =~ STREAM_NAME_REGEXP and unescaped_word !~ /\/\.\.?(\/|$)/
raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
count = 0
raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
count = 0
- while word =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..','.']).empty?
+ raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on file token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
+ while unescape(word) =~ EMPTY_DIR_TOKEN_REGEXP or
+ (word =~ FILE_TOKEN_REGEXP and unescape(word) =~ FILE_NAME_REGEXP and ($~[1].split('/') & ['..', '.']).empty?)
word = words.shift
count += 1
end
assert_equal(0, Keep::Manifest.new("").files_count)
end
+ def test_empty_dir_files_count
+ assert_equal(0,
+ Keep::Manifest.new("./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n").files_count)
+ end
+
def test_empty_files_size
assert_equal(0, Keep::Manifest.new("").files_size)
end
[true, ". 00000000000000000000000000000000+0 0:0:0\n"],
[true, ". 00000000000000000000000000000000+0 0:0:d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff\n"],
[true, ". d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff 0:0:empty.txt\n"],
+ [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n"],
[false, '. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt',
"Invalid manifest: does not end with newline"],
[false, "abc d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
"invalid stream name \"./abc/..\""],
[false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
"invalid stream name \"./abc/./foo\""],
- [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:.\n",
- "invalid file token \"0:0:.\""],
+ # non-empty '.'-named file tokens aren't acceptable. Empty ones are used as empty dir placeholders.
+ [false, ". 8cf8463b34caa8ac871a52d5dd7ad1ef+1 0:1:.\n",
+ "invalid file token \"0:1:.\""],
[false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:..\n",
"invalid file token \"0:0:..\""],
[false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:./abc.txt\n",
"Manifest invalid for stream 1: invalid file token \"0:0:foo//bar.txt\""],
[false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/\n",
"Manifest invalid for stream 1: invalid file token \"0:0:foo/\""],
+ # escaped chars
+ [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n"],
+ [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\\056\n",
+ "Manifest invalid for stream 1: invalid file token \"0:0:\\\\056\\\\056\""],
+ [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\\056\\057foo\n",
+ "Manifest invalid for stream 1: invalid file token \"0:0:\\\\056\\\\056\\\\057foo\""],
+ [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0\\0720\\072foo\n",
+ "Manifest invalid for stream 1: invalid file token \"0\\\\0720\\\\072foo\""],
+ [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 \\060:\\060:foo\n",
+ "Manifest invalid for stream 1: invalid file token \"\\\\060:\\\\060:foo\""],
+ [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057bar\n"],
+ [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\072\n"],
+ [true, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+ [true, "\\056\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+ [true, "./\\134444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+ [false, "./\\\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \"./\\\\\\\\444\""],
+ [true, "./\\011foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+ [false, "./\\011/.. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \"./\\\\011/..\""],
+ [false, ".\\056\\057 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \".\\\\056\\\\057\""],
+ [false, ".\\057\\056 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \".\\\\057\\\\056\""],
+ [false, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\444\n",
+ "Manifest invalid for stream 1: >8-bit encoded chars not allowed on file token \"0:0:foo\\\\444\""],
+ [false, "./\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: >8-bit encoded chars not allowed on stream token \"./\\\\444\""],
+ [false, "./\tfoo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \"./\\tfoo\""],
+ [false, "./foo\\ d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\\\\""],
+ [false, "./foo\\r d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\\\r\""],
+ [false, "./foo\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: >8-bit encoded chars not allowed on stream token \"./foo\\\\444\""],
+ [false, "./foo\\888 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\\\888\""],
+ [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\\n",
+ "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\\""],
+ [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\r\n",
+ "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\r\""],
+ [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\444\n",
+ "Manifest invalid for stream 1: >8-bit encoded chars not allowed on file token \"0:0:foo\\\\444\""],
+ [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\888\n",
+ "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\888\""],
+ [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057/bar\n",
+ "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\057/bar\""],
+ [false, ".\\057/Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "Manifest invalid for stream 1: missing or invalid stream name \".\\\\057/Data\""],
+ [true, "./Data\\040Folder d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+ [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\057foo/bar\n",
+ "Manifest invalid for stream 1: invalid file token \"0:0:\\\\057foo/bar\""],
+ [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134057foo/bar\n"],
+ [false, ". d41d8cd98f00b204e9800998ecf8427e+0 \\040:\\040:foo.txt\n",
+ "Manifest invalid for stream 1: invalid file token \"\\\\040:\\\\040:foo.txt\""],
].each do |ok, manifest, expected_error=nil|
define_method "test_validate manifest #{manifest.inspect}" do
assert_equal ok, Keep::Manifest.valid?(manifest)