From: Lucas Di Pentima Date: Thu, 17 Jan 2019 23:53:58 +0000 (-0300) Subject: 14482: Manifest validation including 8-bit escaped chars; with tests. X-Git-Tag: 1.4.0~172^2~1 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/75487f9f9c705186e41221ff0c52474edbf5a9d0 14482: Manifest validation including 8-bit escaped chars; with tests. Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima --- diff --git a/sdk/ruby/lib/arvados/keep.rb b/sdk/ruby/lib/arvados/keep.rb index b8abf0f7cc..782b274b0a 100644 --- a/sdk/ruby/lib/arvados/keep.rb +++ b/sdk/ruby/lib/arvados/keep.rb @@ -101,9 +101,14 @@ module Keep end class Manifest - STRICT_STREAM_TOKEN_REGEXP = /^(\.)(\/[^\/\t\v\n\r]+)*$/ - STRICT_FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\t\v\n\r\/]+(\/[^\t\v\n\r\/]+)*)$/ - EMPTY_DOT_FILE_TOKEN_REGEXP = /^0:0:\.$/ + STREAM_TOKEN_REGEXP = /^[^\000-\040]+$/ + STREAM_NAME_REGEXP = /^(\.)(\/[^\/]+)*$/ + + EMPTY_DIR_TOKEN_REGEXP = /^0:0:\.$/ # The exception when a file can have '.' as a name + FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:[^\000-\040]+$/ + FILE_NAME_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\/]+(\/[^\/]+)*)$/ + + NON_8BIT_ENCODED_CHAR = /[^\\]\\[4-7][0-7][0-7]/ # Class to parse a manifest text and provide common views of that data. def initialize(manifest_text) @@ -260,8 +265,9 @@ module Keep count = 0 word = words.shift + raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on stream token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR unescaped_word = unescape(word) - count += 1 if unescaped_word =~ STRICT_STREAM_TOKEN_REGEXP and unescaped_word !~ /\/\.\.?(\/|$)/ + count += 1 if word =~ STREAM_TOKEN_REGEXP and unescaped_word =~ STREAM_NAME_REGEXP and unescaped_word !~ /\/\.\.?(\/|$)/ raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1 count = 0 @@ -273,8 +279,9 @@ module Keep raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0 count = 0 - while unescape(word) =~ EMPTY_DOT_FILE_TOKEN_REGEXP or - (unescape(word) =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..', '.']).empty?) + raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on file token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR + while unescape(word) =~ EMPTY_DIR_TOKEN_REGEXP or + (word =~ FILE_TOKEN_REGEXP and unescape(word) =~ FILE_NAME_REGEXP and ($~[1].split('/') & ['..', '.']).empty?) word = words.shift count += 1 end diff --git a/sdk/ruby/test/test_keep_manifest.rb b/sdk/ruby/test/test_keep_manifest.rb index 637c998bd5..813b3ed82b 100644 --- a/sdk/ruby/test/test_keep_manifest.rb +++ b/sdk/ruby/test/test_keep_manifest.rb @@ -438,8 +438,33 @@ class ManifestTest < Minitest::Test "Manifest invalid for stream 1: invalid file token \"0:0:foo/\""], # escaped chars [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n"], + [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\\056\n", + "Manifest invalid for stream 1: invalid file token \"0:0:\\\\056\\\\056\""], + [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\\056\\057foo\n", + "Manifest invalid for stream 1: invalid file token \"0:0:\\\\056\\\\056\\\\057foo\""], + [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0\\0720\\072foo\n", + "Manifest invalid for stream 1: invalid file token \"0\\\\0720\\\\072foo\""], + [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 \\060:\\060:foo\n", + "Manifest invalid for stream 1: invalid file token \"\\\\060:\\\\060:foo\""], [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057bar\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\072\n"], [true, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"], + [true, "\\056\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"], + [true, "./\\134444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"], + [true, "./\\\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"], + [true, "./\\011foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"], + [false, "./\\011/.. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n", + "Manifest invalid for stream 1: missing or invalid stream name \"./\\\\011/..\""], + [false, ".\\056\\057 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n", + "Manifest invalid for stream 1: missing or invalid stream name \".\\\\056\\\\057\""], + [false, ".\\057\\056 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n", + "Manifest invalid for stream 1: missing or invalid stream name \".\\\\057\\\\056\""], + [false, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\444\n", + "Manifest invalid for stream 1: >8-bit encoded chars not allowed on file token \"0:0:foo\\\\444\""], + [false, "./\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n", + "Manifest invalid for stream 1: >8-bit encoded chars not allowed on stream token \"./\\\\444\""], + [false, "./\tfoo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n", + "Manifest invalid for stream 1: missing or invalid stream name \"./\\tfoo\""], [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057/bar\n", "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\057/bar\""], [false, ".\\057/Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",