Merge branch 'master' into 5675-project-subprojects-in-anonymous-view
[arvados.git] / sdk / ruby / test / test_keep_manifest.rb
1 require "arvados/keep"
2 require "minitest/autorun"
3 require "sdk_fixtures"
4
5 class ManifestTest < Minitest::Test
6   include SDKFixtures
7
8   def check_stream(stream, exp_name, exp_blocks, exp_files)
9     assert_equal(exp_name, stream.first)
10     assert_equal(exp_blocks, stream[1].map(&:to_s))
11     assert_equal(exp_files, stream.last)
12   end
13
14   def test_simple_each_line_array
15     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
16     stream_name, block_s, file = SIMPLEST_MANIFEST.strip.split
17     stream_a = manifest.each_line.to_a
18     assert_equal(1, stream_a.size, "wrong number of streams")
19     check_stream(stream_a.first, stream_name, [block_s], [file])
20   end
21
22   def test_simple_each_line_block
23     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
24     result = []
25     manifest.each_line do |stream, blocks, files|
26       result << files
27     end
28     assert_equal([[SIMPLEST_MANIFEST.split.last]], result,
29                  "wrong result from each_line block")
30   end
31
32   def test_multilevel_each_line
33     manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
34     seen = []
35     manifest.each_line do |stream, blocks, files|
36       refute(seen.include?(stream),
37              "each_line already yielded stream #{stream}")
38       seen << stream
39       assert_equal(3, files.size, "wrong file count for stream #{stream}")
40     end
41     assert_equal(MULTILEVEL_MANIFEST.count("\n"), seen.size,
42                  "wrong number of streams")
43   end
44
45   def test_empty_each_line
46     assert_empty(Keep::Manifest.new("").each_line.to_a)
47   end
48
49   def test_empty_each_file_spec
50     assert_empty(Keep::Manifest.new("").each_file_spec.to_a)
51   end
52
53   def test_empty_files
54     assert_empty(Keep::Manifest.new("").files)
55   end
56
57   def test_empty_files_count
58     assert_equal(0, Keep::Manifest.new("").files_count)
59   end
60
61   def test_empty_files_size
62     assert_equal(0, Keep::Manifest.new("").files_size)
63   end
64
65   def test_empty_has_file?
66     refute(Keep::Manifest.new("").has_file?(""))
67   end
68
69   def test_empty_line_within_manifest
70     block_s = random_block
71     manifest = Keep::Manifest.
72       new([". #{block_s} 0:1:file1 1:2:file2\n",
73            "\n",
74            ". #{block_s} 3:3:file3 6:4:file4\n"].join(""))
75     streams = manifest.each_line.to_a
76     assert_equal(2, streams.size)
77     check_stream(streams[0], ".", [block_s], ["0:1:file1", "1:2:file2"])
78     check_stream(streams[1], ".", [block_s], ["3:3:file3", "6:4:file4"])
79   end
80
81   def test_backslash_escape_parsing
82     manifest = Keep::Manifest.new(MANY_ESCAPES_MANIFEST)
83     streams = manifest.each_line.to_a
84     assert_equal(1, streams.size, "wrong number of streams with whitespace")
85     assert_equal("./dir name", streams.first.first,
86                  "wrong stream name with whitespace")
87     assert_equal(["0:9:file\\name\t\\here.txt"], streams.first.last,
88                  "wrong filename(s) with whitespace")
89   end
90
91   def test_simple_files
92     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
93     assert_equal([[".", "simple.txt", 9]], manifest.files)
94   end
95
96   def test_multilevel_files
97     manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
98     seen = Hash.new { |this, key| this[key] = [] }
99     manifest.files.each do |stream, basename, size|
100       refute(seen[stream].include?(basename),
101              "each_file repeated #{stream}/#{basename}")
102       seen[stream] << basename
103       assert_equal(3, size, "wrong size for #{stream}/#{basename}")
104     end
105     seen.each_pair do |stream, basenames|
106       assert_equal(%w(file1 file2 file3), basenames.sort,
107                    "wrong file list for #{stream}")
108     end
109   end
110
111   def test_files_with_colons_in_names
112     manifest = Keep::Manifest.new(COLON_FILENAME_MANIFEST)
113     assert_equal([[".", "file:test.txt", 9]], manifest.files)
114   end
115
116   def test_files_with_escape_sequence_in_filename
117     manifest = Keep::Manifest.new(ESCAPED_FILENAME_MANIFEST)
118     assert_equal([[".", "a a.txt", 9]], manifest.files)
119   end
120
121   def test_files_spanning_multiple_blocks
122     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
123     assert_equal([[".", "repfile", 5],
124                   [".", "uniqfile", 4],
125                   [".", "uniqfile2", 7],
126                   ["./s1", "repfile", 3],
127                   ["./s1", "uniqfile", 3]],
128                  manifest.files.sort)
129   end
130
131   def test_minimum_file_count_simple
132     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
133     assert(manifest.minimum_file_count?(1), "real minimum file count false")
134     refute(manifest.minimum_file_count?(2), "fake minimum file count true")
135   end
136
137   def test_minimum_file_count_multiblock
138     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
139     assert(manifest.minimum_file_count?(2), "low minimum file count false")
140     assert(manifest.minimum_file_count?(5), "real minimum file count false")
141     refute(manifest.minimum_file_count?(6), "fake minimum file count true")
142   end
143
144   def test_exact_file_count_simple
145     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
146     assert(manifest.exact_file_count?(1), "exact file count false")
147     refute(manifest.exact_file_count?(0), "-1 file count true")
148     refute(manifest.exact_file_count?(2), "+1 file count true")
149   end
150
151   def test_exact_file_count_multiblock
152     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
153     assert(manifest.exact_file_count?(5), "exact file count false")
154     refute(manifest.exact_file_count?(4), "-1 file count true")
155     refute(manifest.exact_file_count?(6), "+1 file count true")
156   end
157
158   def test_files_size_multiblock
159     assert_equal(22, Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST).files_size)
160   end
161
162   def test_files_size_with_skipped_overlapping_data
163     manifest = Keep::Manifest.new(". #{random_block(9)} 3:3:f1 5:3:f2\n")
164     assert_equal(6, manifest.files_size)
165   end
166
167   def test_has_file
168     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
169     assert(manifest.has_file?("./repfile"), "one-arg repfile not found")
170     assert(manifest.has_file?(".", "repfile"), "two-arg repfile not found")
171     assert(manifest.has_file?("./s1/repfile"), "one-arg s1/repfile not found")
172     assert(manifest.has_file?("./s1", "repfile"), "two-arg s1/repfile not found")
173     refute(manifest.has_file?("./s1/uniqfile2"), "one-arg missing file found")
174     refute(manifest.has_file?("./s1", "uniqfile2"), "two-arg missing file found")
175     refute(manifest.has_file?("./s2/repfile"), "one-arg missing stream found")
176     refute(manifest.has_file?("./s2", "repfile"), "two-arg missing stream found")
177   end
178
179   def test_has_file_with_spaces
180     manifest = Keep::Manifest.new(ESCAPED_FILENAME_MANIFEST)
181     assert(manifest.has_file?("./a a.txt"), "one-arg path not found")
182     assert(manifest.has_file?(".", "a a.txt"), "two-arg path not found")
183     refute(manifest.has_file?("a\\040\\141"), "one-arg unescaped found")
184     refute(manifest.has_file?(".", "a\\040\\141"), "two-arg unescaped found")
185   end
186
187   def test_parse_all_fixtures
188     fixtures('collections').each do |name, collection|
189       parse_collection_manifest name, collection
190     end
191   end
192
193   def test_raise_on_bogus_fixture
194     assert_raises ArgumentError do
195       parse_collection_manifest('bogus collection',
196                                 {'manifest_text' => ". zzz 0:\n"})
197     end
198   end
199
200   def parse_collection_manifest name, collection
201     manifest = Keep::Manifest.new(collection['manifest_text'])
202     manifest.each_file_spec do |stream_name, start_pos, file_size, file_name|
203       assert_kind_of String, stream_name
204       assert_kind_of Integer, start_pos
205       assert_kind_of Integer, file_size
206       assert_kind_of String, file_name
207       assert !stream_name.empty?, "empty stream_name in #{name} fixture"
208       assert !file_name.empty?, "empty file_name in #{name} fixture"
209     end
210   end
211
212   def test_collection_with_dirs_in_filenames
213     manifest = Keep::Manifest.new(MANIFEST_WITH_DIRS_IN_FILENAMES)
214
215     seen = Hash.new { |this, key| this[key] = [] }
216
217     manifest.files.each do |stream, basename, size|
218       refute(seen[stream].include?(basename), "each_file repeated #{stream}/#{basename}")
219       assert_equal(3, size, "wrong size for #{stream}/#{basename}")
220       seen[stream] << basename
221     end
222
223     assert_equal(%w(. ./dir1 ./dir1/dir2), seen.keys)
224
225     seen.each_pair do |stream, basenames|
226       assert_equal(%w(file1), basenames.sort, "wrong file list for #{stream}")
227     end
228   end
229
230   def test_multilevel_collection_with_dirs_in_filenames
231     manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST_WITH_DIRS_IN_FILENAMES)
232
233     seen = Hash.new { |this, key| this[key] = [] }
234     expected_sizes = {'.' => 3, './dir1' => 6, './dir1/dir2' => 11}
235
236     manifest.files.each do |stream, basename, size|
237       refute(seen[stream].include?(basename), "each_file repeated #{stream}/#{basename}")
238       assert_equal(expected_sizes[stream], size, "wrong size for #{stream}/#{basename}")
239       seen[stream] << basename
240     end
241
242     assert_equal(%w(. ./dir1 ./dir1/dir2), seen.keys)
243
244     seen.each_pair do |stream, basenames|
245       assert_equal(%w(file1), basenames.sort, "wrong file list for #{stream}")
246     end
247   end
248 end