X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ae8ed6d9d0e5231379564beedc08c812352b7043..40104312e89869bd008c156a2897caf1667e9c10:/sdk/ruby/test/test_keep_manifest.rb diff --git a/sdk/ruby/test/test_keep_manifest.rb b/sdk/ruby/test/test_keep_manifest.rb index f1f1a530ce..f29e978ab8 100644 --- a/sdk/ruby/test/test_keep_manifest.rb +++ b/sdk/ruby/test/test_keep_manifest.rb @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + require "arvados/keep" require "minitest/autorun" require "sdk_fixtures" @@ -58,6 +62,11 @@ class ManifestTest < Minitest::Test assert_equal(0, Keep::Manifest.new("").files_count) end + def test_empty_dir_files_count + assert_equal(0, + Keep::Manifest.new("./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n").files_count) + end + def test_empty_files_size assert_equal(0, Keep::Manifest.new("").files_size) end @@ -208,4 +217,252 @@ class ManifestTest < Minitest::Test assert !file_name.empty?, "empty file_name in #{name} fixture" end end + + def test_collection_with_dirs_in_filenames + manifest = Keep::Manifest.new(MANIFEST_WITH_DIRS_IN_FILENAMES) + + seen = Hash.new { |this, key| this[key] = [] } + + manifest.files.each do |stream, basename, size| + refute(seen[stream].include?(basename), "each_file repeated #{stream}/#{basename}") + assert_equal(3, size, "wrong size for #{stream}/#{basename}") + seen[stream] << basename + end + + assert_equal(%w(. ./dir1 ./dir1/dir2), seen.keys) + + seen.each_pair do |stream, basenames| + assert_equal(%w(file1), basenames.sort, "wrong file list for #{stream}") + end + end + + def test_multilevel_collection_with_dirs_in_filenames + manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST_WITH_DIRS_IN_FILENAMES) + + seen = Hash.new { |this, key| this[key] = [] } + expected_sizes = {'.' => 3, './dir1' => 6, './dir1/dir2' => 11} + + manifest.files.each do |stream, basename, size| + refute(seen[stream].include?(basename), "each_file repeated #{stream}/#{basename}") + assert_equal(expected_sizes[stream], size, "wrong size for #{stream}/#{basename}") + seen[stream] << basename + end + + assert_equal(%w(. ./dir1 ./dir1/dir2), seen.keys) + + seen.each_pair do |stream, basenames| + assert_equal(%w(file1), basenames.sort, "wrong file list for #{stream}") + end + end + + [[false, nil], + [false, '+0'], + [false, 'd41d8cd98f00b204e9800998ecf8427'], + [false, 'd41d8cd98f00b204e9800998ecf8427+0'], + [false, 'd41d8cd98f00b204e9800998ecf8427e0'], + [false, 'd41d8cd98f00b204e9800998ecf8427e0+0'], + [false, 'd41d8cd98f00b204e9800998ecf8427e+0 '], + [false, "d41d8cd98f00b204e9800998ecf8427e+0\n"], + [false, ' d41d8cd98f00b204e9800998ecf8427e+0'], + [false, 'd41d8cd98f00b204e9800998ecf8427e+K+0'], + [false, 'd41d8cd98f00b204e9800998ecf8427e+0+0'], + [false, 'd41d8cd98f00b204e9800998ecf8427e++'], + [false, 'd41d8cd98f00b204e9800998ecf8427e+0+K+'], + [false, 'd41d8cd98f00b204e9800998ecf8427e+0++K'], + [false, 'd41d8cd98f00b204e9800998ecf8427e+0+K++'], + [false, 'd41d8cd98f00b204e9800998ecf8427e+0+K++Z'], + [true, 'd41d8cd98f00b204e9800998ecf8427e', nil,nil,nil], + [true, 'd41d8cd98f00b204e9800998ecf8427e+0', '+0','0',nil], + [true, 'd41d8cd98f00b204e9800998ecf8427e+0+Fizz+Buzz','+0','0','+Fizz+Buzz'], + [true, 'd41d8cd98f00b204e9800998ecf8427e+Fizz+Buzz', nil,nil,'+Fizz+Buzz'], + [true, 'd41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000+Foo', '+0','0','+Ad41d8cd98f00b204e9800998ecf8427e00000000+Foo'], + [true, 'd41d8cd98f00b204e9800998ecf8427e+Ad41d8cd98f00b204e9800998ecf8427e00000000+Foo', nil,nil,'+Ad41d8cd98f00b204e9800998ecf8427e00000000+Foo'], + [true, 'd41d8cd98f00b204e9800998ecf8427e+0+Z', '+0','0','+Z'], + [true, 'd41d8cd98f00b204e9800998ecf8427e+Z', nil,nil,'+Z'], + ].each do |ok, locator, match2, match3, match4| + define_method "test_LOCATOR_REGEXP_on_#{locator.inspect}" do + match = Keep::Locator::LOCATOR_REGEXP.match locator + assert_equal ok, !!match + if ok + assert_equal match2, match[2] + assert_equal match3, match[3] + assert_equal match4, match[4] + end + end + define_method "test_parse_method_on_#{locator.inspect}" do + loc = Keep::Locator.parse locator + if !ok + assert_nil loc + else + refute_nil loc + assert loc.is_a?(Keep::Locator) + #assert loc.hash + #assert loc.size + #assert loc.hints.is_a?(Array) + end + end + end + + [ + [false, nil, "No manifest found"], + [true, ""], + [false, " ", "Invalid manifest: does not end with newline"], + [true, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e a41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n"], # 2 locators + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/bar.txt\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:.foo.txt\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:.foo\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:...\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:.../.foo./.../bar\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/...\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/.../bar\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/.bar/baz.txt\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/bar./baz.txt\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 000000000000000000000000000000:0777:foo.txt\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:0:0\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\040\n"], + [true, ". 00000000000000000000000000000000+0 0:0:0\n"], + [true, ". 00000000000000000000000000000000+0 0:0:d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff 0:0:empty.txt\n"], + [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n"], + [false, '. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt', + "Invalid manifest: does not end with newline"], + [false, "abc d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n", + "invalid stream name \"abc\""], + [false, "abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n", + "invalid stream name \"abc/./foo\""], + [false, "./abc/../foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n", + "invalid stream name \"./abc/../foo\""], + [false, "./abc/. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n", + "invalid stream name \"./abc/.\""], + [false, "./abc/.. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n", + "invalid stream name \"./abc/..\""], + [false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n", + "invalid stream name \"./abc/./foo\""], + # non-empty '.'-named file tokens aren't acceptable. Empty ones are used as empty dir placeholders. + [false, ". 8cf8463b34caa8ac871a52d5dd7ad1ef+1 0:1:.\n", + "invalid file token \"0:1:.\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:..\n", + "invalid file token \"0:0:..\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:./abc.txt\n", + "invalid file token \"0:0:./abc.txt\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:../abc.txt\n", + "invalid file token \"0:0:../abc.txt\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt/.\n", + "invalid file token \"0:0:abc.txt/.\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt/..\n", + "invalid file token \"0:0:abc.txt/..\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:a/./bc.txt\n", + "invalid file token \"0:0:a/./bc.txt\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:a/../bc.txt\n", + "invalid file token \"0:0:a/../bc.txt\""], + [false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n", + "invalid stream name \"./abc/./foo\""], + [false, "d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n", + "invalid stream name \"d41d8cd98f00b204e9800998ecf8427e+0\""], + [false, ". d41d8cd98f00b204e9800998ecf8427 0:0:abc.txt\n", + "invalid locator \"d41d8cd98f00b204e9800998ecf8427\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e\n", + "Manifest invalid for stream 1: no file tokens"], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n/dir1 d41d8cd98f00b204e9800998ecf842 0:0:abc.txt\n", + "Manifest invalid for stream 2: missing or invalid stream name \"/dir1\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 d41d8cd98f00b204e9800998ecf842 0:0:abc.txt\n", + "Manifest invalid for stream 2: missing or invalid locator \"d41d8cd98f00b204e9800998ecf842\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 a41d8cd98f00b204e9800998ecf8427e+0 abc.txt\n", + "Manifest invalid for stream 2: invalid file token \"abc.txt\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 a41d8cd98f00b204e9800998ecf8427e+0 0:abc.txt\n", + "Manifest invalid for stream 2: invalid file token \"0:abc.txt\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 a41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt xyz.txt\n", + "Manifest invalid for stream 2: invalid file token \"xyz.txt\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt d41d8cd98f00b204e9800998ecf8427e+0\n", + "Manifest invalid for stream 1: invalid file token \"d41d8cd98f00b204e9800998ecf8427e+0\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\n", + "Manifest invalid for stream 1: invalid file token \"0:0:\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0\n", + "Manifest invalid for stream 1: no file tokens"], + [false, ". 0:0:foo.txt d41d8cd98f00b204e9800998ecf8427e+0\n", + "Manifest invalid for stream 1: missing or invalid locator \"0:0:foo.txt\""], + [false, ". 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid locator \"0:0:foo.txt\""], + [false, ".\n", "Manifest invalid for stream 1: missing or invalid locator"], + [false, ".", "Invalid manifest: does not end with newline"], + [false, ". \n", "Manifest invalid for stream 1: missing or invalid locator"], + [false, ". \n", "Manifest invalid for stream 1: missing or invalid locator"], + [false, " . d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid stream name"], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt \n", + "stream 1: trailing space"], + # TAB and other tricky whitespace characters: + [false, "\v. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid stream name \"\\v."], + [false, "./foo\vbar d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\vbar"], + [false, "\t. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid stream name \"\\t"], + [false, ".\td41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid stream name \".\\t"], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\t\n", + "stream 1: invalid file token \"0:0:foo.txt\\t\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0\t 0:0:foo.txt\n", + "stream 1: missing or invalid locator \"d41d8cd98f00b204e9800998ecf8427e+0\\t\""], + [false, "./foo\tbar d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "stream 1: missing or invalid stream name \"./foo\\tbar\""], + # other whitespace errors: + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: invalid file token \"\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n \n", + "Manifest invalid for stream 2: missing stream name"], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n\n", + "Manifest invalid for stream 2: missing stream name"], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n ", + "Invalid manifest: does not end with newline"], + [false, "\n. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing stream name"], + [false, " \n. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing stream name"], + # empty file and stream name components: + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:/foo.txt\n", + "Manifest invalid for stream 1: invalid file token \"0:0:/foo.txt\""], + [false, "./ d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid stream name \"./\""], + [false, ".//foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid stream name \".//foo\""], + [false, "./foo/ d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid stream name \"./foo/\""], + [false, "./foo//bar d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n", + "Manifest invalid for stream 1: missing or invalid stream name \"./foo//bar\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo//bar.txt\n", + "Manifest invalid for stream 1: invalid file token \"0:0:foo//bar.txt\""], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/\n", + "Manifest invalid for stream 1: invalid file token \"0:0:foo/\""], + # escaped chars + [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n"], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057bar\n"], + [true, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057/bar\n", + "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\057/bar\""], + [false, ".\\057/Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n", + "Manifest invalid for stream 1: missing or invalid stream name \".\\\\057/Data\""], + [true, "./Data\\040Folder d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\057foo/bar\n", + "Manifest invalid for stream 1: invalid file token \"0:0:\\\\057foo/bar\""], + [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134057foo/bar\n"], + [false, ". d41d8cd98f00b204e9800998ecf8427e+0 \\040:\\040:foo.txt\n"], + ].each do |ok, manifest, expected_error=nil| + define_method "test_validate manifest #{manifest.inspect}" do + assert_equal ok, Keep::Manifest.valid?(manifest) + if ok + assert Keep::Manifest.validate! manifest + else + begin + Keep::Manifest.validate! manifest + rescue ArgumentError => e + msg = e.message + end + refute_nil msg, "Expected ArgumentError" + assert msg.include?(expected_error), "Did not find expected error message. Expected: #{expected_error}; Actual: #{msg}" + end + end + end end