Merge branch '4523-full-text-search' into 5110-workbench-full-text-search
[arvados.git] / sdk / ruby / test / test_keep_manifest.rb
1 require "minitest/autorun"
2 require "arvados/keep"
3 require "yaml"
4
5 def random_block(size=nil)
6   sprintf("%032x+%d", rand(16 ** 32), size || rand(64 * 1024 * 1024))
7 end
8
9 class ManifestTest < Minitest::Test
10   SIMPLEST_MANIFEST = ". #{random_block(9)} 0:9:simple.txt\n"
11   MULTIBLOCK_FILE_MANIFEST =
12     [". #{random_block(8)} 0:4:repfile 4:4:uniqfile",
13      "./s1 #{random_block(6)} 0:3:repfile 3:3:uniqfile",
14      ". #{random_block(8)} 0:7:uniqfile2 7:1:repfile\n"].join("\n")
15   MULTILEVEL_MANIFEST =
16     [". #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
17      "./dir1 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
18      "./dir1/subdir #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
19      "./dir2 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n"].join("")
20
21   def check_stream(stream, exp_name, exp_blocks, exp_files)
22     assert_equal(exp_name, stream.first)
23     assert_equal(exp_blocks, stream[1].map(&:to_s))
24     assert_equal(exp_files, stream.last)
25   end
26
27   def test_simple_each_line_array
28     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
29     stream_name, block_s, file = SIMPLEST_MANIFEST.strip.split
30     stream_a = manifest.each_line.to_a
31     assert_equal(1, stream_a.size, "wrong number of streams")
32     check_stream(stream_a.first, stream_name, [block_s], [file])
33   end
34
35   def test_simple_each_line_block
36     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
37     result = []
38     manifest.each_line do |stream, blocks, files|
39       result << files
40     end
41     assert_equal([[SIMPLEST_MANIFEST.split.last]], result,
42                  "wrong result from each_line block")
43   end
44
45   def test_multilevel_each_line
46     manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
47     seen = []
48     manifest.each_line do |stream, blocks, files|
49       refute(seen.include?(stream),
50              "each_line already yielded stream #{stream}")
51       seen << stream
52       assert_equal(3, files.size, "wrong file count for stream #{stream}")
53     end
54     assert_equal(4, seen.size, "wrong number of streams")
55   end
56
57   def test_empty_each_line
58     assert_empty(Keep::Manifest.new("").each_line.to_a)
59   end
60
61   def test_empty_each_file_spec
62     assert_empty(Keep::Manifest.new("").each_file_spec.to_a)
63   end
64
65   def test_empty_files
66     assert_empty(Keep::Manifest.new("").files)
67   end
68
69   def test_empty_files_count
70     assert_equal(0, Keep::Manifest.new("").files_count)
71   end
72
73   def test_empty_has_file?
74     refute(Keep::Manifest.new("").has_file?(""))
75   end
76
77   def test_empty_line_within_manifest
78     block_s = random_block
79     manifest = Keep::Manifest.
80       new([". #{block_s} 0:1:file1 1:2:file2\n",
81            "\n",
82            ". #{block_s} 3:3:file3 6:4:file4\n"].join(""))
83     streams = manifest.each_line.to_a
84     assert_equal(2, streams.size)
85     check_stream(streams[0], ".", [block_s], ["0:1:file1", "1:2:file2"])
86     check_stream(streams[1], ".", [block_s], ["3:3:file3", "6:4:file4"])
87   end
88
89   def test_backslash_escape_parsing
90     m_text = "./dir\\040name #{random_block} 0:0:file\\\\name\\011\\here.txt\n"
91     manifest = Keep::Manifest.new(m_text)
92     streams = manifest.each_line.to_a
93     assert_equal(1, streams.size, "wrong number of streams with whitespace")
94     assert_equal("./dir name", streams.first.first,
95                  "wrong stream name with whitespace")
96     assert_equal(["0:0:file\\name\t\\here.txt"], streams.first.last,
97                  "wrong filename(s) with whitespace")
98   end
99
100   def test_simple_files
101     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
102     assert_equal([[".", "simple.txt", 9]], manifest.files)
103   end
104
105   def test_multilevel_files
106     manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
107     seen = Hash.new { |this, key| this[key] = [] }
108     manifest.files.each do |stream, basename, size|
109       refute(seen[stream].include?(basename),
110              "each_file repeated #{stream}/#{basename}")
111       seen[stream] << basename
112       assert_equal(3, size, "wrong size for #{stream}/#{basename}")
113     end
114     seen.each_pair do |stream, basenames|
115       assert_equal(%w(file1 file2 file3), basenames.sort,
116                    "wrong file list for #{stream}")
117     end
118   end
119
120   def test_files_with_colons_in_names
121     manifest = Keep::Manifest.new(". #{random_block(9)} 0:9:file:test.txt\n")
122     assert_equal([[".", "file:test.txt", 9]], manifest.files)
123   end
124
125   def test_files_with_escape_sequence_in_filename
126     manifest = Keep::Manifest.new(". #{random_block(9)} 0:9:a\\040\\141.txt\n")
127     assert_equal([[".", "a a.txt", 9]], manifest.files)
128   end
129
130   def test_files_spanning_multiple_blocks
131     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
132     assert_equal([[".", "repfile", 5],
133                   [".", "uniqfile", 4],
134                   [".", "uniqfile2", 7],
135                   ["./s1", "repfile", 3],
136                   ["./s1", "uniqfile", 3]],
137                  manifest.files.sort)
138   end
139
140   def test_minimum_file_count_simple
141     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
142     assert(manifest.minimum_file_count?(1), "real minimum file count false")
143     refute(manifest.minimum_file_count?(2), "fake minimum file count true")
144   end
145
146   def test_minimum_file_count_multiblock
147     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
148     assert(manifest.minimum_file_count?(2), "low minimum file count false")
149     assert(manifest.minimum_file_count?(5), "real minimum file count false")
150     refute(manifest.minimum_file_count?(6), "fake minimum file count true")
151   end
152
153   def test_exact_file_count_simple
154     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
155     assert(manifest.exact_file_count?(1), "exact file count false")
156     refute(manifest.exact_file_count?(0), "-1 file count true")
157     refute(manifest.exact_file_count?(2), "+1 file count true")
158   end
159
160   def test_exact_file_count_multiblock
161     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
162     assert(manifest.exact_file_count?(5), "exact file count false")
163     refute(manifest.exact_file_count?(4), "-1 file count true")
164     refute(manifest.exact_file_count?(6), "+1 file count true")
165   end
166
167   def test_has_file
168     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
169     assert(manifest.has_file?("./repfile"), "one-arg repfile not found")
170     assert(manifest.has_file?(".", "repfile"), "two-arg repfile not found")
171     assert(manifest.has_file?("./s1/repfile"), "one-arg s1/repfile not found")
172     assert(manifest.has_file?("./s1", "repfile"), "two-arg s1/repfile not found")
173     refute(manifest.has_file?("./s1/uniqfile2"), "one-arg missing file found")
174     refute(manifest.has_file?("./s1", "uniqfile2"), "two-arg missing file found")
175     refute(manifest.has_file?("./s2/repfile"), "one-arg missing stream found")
176     refute(manifest.has_file?("./s2", "repfile"), "two-arg missing stream found")
177   end
178
179   def test_has_file_with_spaces
180     manifest = Keep::Manifest.new(". #{random_block(3)} 0:3:a\\040b\\040c\n")
181     assert(manifest.has_file?("./a b c"), "one-arg 'a b c' not found")
182     assert(manifest.has_file?(".", "a b c"), "two-arg 'a b c' not found")
183     refute(manifest.has_file?("a\\040b\\040c"), "one-arg unescaped found")
184     refute(manifest.has_file?(".", "a\\040b\\040c"), "two-arg unescaped found")
185   end
186
187   def test_parse_all_fixtures
188     fixtures('collections').each do |name, collection|
189       parse_collection_manifest name, collection
190     end
191   end
192
193   def test_raise_on_bogus_fixture
194     assert_raises ArgumentError do
195       parse_collection_manifest('bogus collection',
196                                 {'manifest_text' => ". zzz 0:\n"})
197     end
198   end
199
200   def parse_collection_manifest name, collection
201     manifest = Keep::Manifest.new(collection['manifest_text'])
202     manifest.each_file_spec do |stream_name, start_pos, file_size, file_name|
203       assert_kind_of String, stream_name
204       assert_kind_of Integer, start_pos
205       assert_kind_of Integer, file_size
206       assert_kind_of String, file_name
207       assert !stream_name.empty?, "empty stream_name in #{name} fixture"
208       assert !file_name.empty?, "empty file_name in #{name} fixture"
209     end
210   end
211
212   @@fixtures = nil
213   def fixtures name
214     return @@fixtures if @@fixtures
215     path = File.expand_path("../../../../services/api/test/fixtures/#{name}.yml",
216                             __FILE__)
217     file = IO.read(path)
218     trim_index = file.index('# Test Helper trims the rest of the file')
219     file = file[0, trim_index] if trim_index
220     @@fixtures = YAML.load(file)
221   end
222 end