5176: Add more Ruby SDK tests for empty manifests.
[arvados.git] / sdk / ruby / test / test_keep_manifest.rb
1 require "minitest/autorun"
2 require "arvados/keep"
3
4 def random_block(size=nil)
5   sprintf("%032x+%d", rand(16 ** 32), size || rand(64 * 1024 * 1024))
6 end
7
8 class ManifestTest < Minitest::Test
9   SIMPLEST_MANIFEST = ". #{random_block(9)} 0:9:simple.txt\n"
10   MULTIBLOCK_FILE_MANIFEST =
11     [". #{random_block(8)} 0:4:repfile 4:4:uniqfile",
12      "./s1 #{random_block(6)} 0:3:repfile 3:3:uniqfile",
13      ". #{random_block(8)} 0:7:uniqfile2 7:1:repfile\n"].join("\n")
14   MULTILEVEL_MANIFEST =
15     [". #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
16      "./dir1 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
17      "./dir1/subdir #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
18      "./dir2 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n"].join("")
19
20   def check_stream(stream, exp_name, exp_blocks, exp_files)
21     assert_equal(exp_name, stream.first)
22     assert_equal(exp_blocks, stream[1].map(&:to_s))
23     assert_equal(exp_files, stream.last)
24   end
25
26   def test_simple_each_line_array
27     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
28     stream_name, block_s, file = SIMPLEST_MANIFEST.strip.split
29     stream_a = manifest.each_line.to_a
30     assert_equal(1, stream_a.size, "wrong number of streams")
31     check_stream(stream_a.first, stream_name, [block_s], [file])
32   end
33
34   def test_simple_each_line_block
35     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
36     result = []
37     manifest.each_line do |stream, blocks, files|
38       result << files
39     end
40     assert_equal([[SIMPLEST_MANIFEST.split.last]], result,
41                  "wrong result from each_line block")
42   end
43
44   def test_multilevel_each_line
45     manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
46     seen = []
47     manifest.each_line do |stream, blocks, files|
48       refute(seen.include?(stream),
49              "each_line already yielded stream #{stream}")
50       seen << stream
51       assert_equal(3, files.size, "wrong file count for stream #{stream}")
52     end
53     assert_equal(4, seen.size, "wrong number of streams")
54   end
55
56   def test_empty_each_line
57     assert_empty(Keep::Manifest.new("").each_line.to_a)
58   end
59
60   def test_empty_each_file_spec
61     assert_empty(Keep::Manifest.new("").each_file_spec.to_a)
62   end
63
64   def test_empty_files
65     assert_empty(Keep::Manifest.new("").files)
66   end
67
68   def test_empty_files_count
69     assert_equal(0, Keep::Manifest.new("").files_count)
70   end
71
72   def test_empty_has_file?
73     refute(Keep::Manifest.new("").has_file?(""))
74   end
75
76   def test_empty_line_within_manifest
77     block_s = random_block
78     manifest = Keep::Manifest.
79       new([". #{block_s} 0:1:file1 1:2:file2\n",
80            "\n",
81            ". #{block_s} 3:3:file3 6:4:file4\n"].join(""))
82     streams = manifest.each_line.to_a
83     assert_equal(2, streams.size)
84     check_stream(streams[0], ".", [block_s], ["0:1:file1", "1:2:file2"])
85     check_stream(streams[1], ".", [block_s], ["3:3:file3", "6:4:file4"])
86   end
87
88   def test_backslash_escape_parsing
89     m_text = "./dir\\040name #{random_block} 0:0:file\\\\name\\011\\here.txt\n"
90     manifest = Keep::Manifest.new(m_text)
91     streams = manifest.each_line.to_a
92     assert_equal(1, streams.size, "wrong number of streams with whitespace")
93     assert_equal("./dir name", streams.first.first,
94                  "wrong stream name with whitespace")
95     assert_equal(["0:0:file\\name\t\\here.txt"], streams.first.last,
96                  "wrong filename(s) with whitespace")
97   end
98
99   def test_simple_files
100     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
101     assert_equal([[".", "simple.txt", 9]], manifest.files)
102   end
103
104   def test_multilevel_files
105     manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
106     seen = Hash.new { |this, key| this[key] = [] }
107     manifest.files.each do |stream, basename, size|
108       refute(seen[stream].include?(basename),
109              "each_file repeated #{stream}/#{basename}")
110       seen[stream] << basename
111       assert_equal(3, size, "wrong size for #{stream}/#{basename}")
112     end
113     seen.each_pair do |stream, basenames|
114       assert_equal(%w(file1 file2 file3), basenames.sort,
115                    "wrong file list for #{stream}")
116     end
117   end
118
119   def test_files_with_colons_in_names
120     manifest = Keep::Manifest.new(". #{random_block(9)} 0:9:file:test.txt\n")
121     assert_equal([[".", "file:test.txt", 9]], manifest.files)
122   end
123
124   def test_files_with_escape_sequence_in_filename
125     manifest = Keep::Manifest.new(". #{random_block(9)} 0:9:a\\040\\141.txt\n")
126     assert_equal([[".", "a a.txt", 9]], manifest.files)
127   end
128
129   def test_files_spanning_multiple_blocks
130     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
131     assert_equal([[".", "repfile", 5],
132                   [".", "uniqfile", 4],
133                   [".", "uniqfile2", 7],
134                   ["./s1", "repfile", 3],
135                   ["./s1", "uniqfile", 3]],
136                  manifest.files.sort)
137   end
138
139   def test_minimum_file_count_simple
140     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
141     assert(manifest.minimum_file_count?(1), "real minimum file count false")
142     refute(manifest.minimum_file_count?(2), "fake minimum file count true")
143   end
144
145   def test_minimum_file_count_multiblock
146     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
147     assert(manifest.minimum_file_count?(2), "low minimum file count false")
148     assert(manifest.minimum_file_count?(5), "real minimum file count false")
149     refute(manifest.minimum_file_count?(6), "fake minimum file count true")
150   end
151
152   def test_exact_file_count_simple
153     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
154     assert(manifest.exact_file_count?(1), "exact file count false")
155     refute(manifest.exact_file_count?(0), "-1 file count true")
156     refute(manifest.exact_file_count?(2), "+1 file count true")
157   end
158
159   def test_exact_file_count_multiblock
160     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
161     assert(manifest.exact_file_count?(5), "exact file count false")
162     refute(manifest.exact_file_count?(4), "-1 file count true")
163     refute(manifest.exact_file_count?(6), "+1 file count true")
164   end
165
166   def test_has_file
167     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
168     assert(manifest.has_file?("./repfile"), "one-arg repfile not found")
169     assert(manifest.has_file?(".", "repfile"), "two-arg repfile not found")
170     assert(manifest.has_file?("./s1/repfile"), "one-arg s1/repfile not found")
171     assert(manifest.has_file?("./s1", "repfile"), "two-arg s1/repfile not found")
172     refute(manifest.has_file?("./s1/uniqfile2"), "one-arg missing file found")
173     refute(manifest.has_file?("./s1", "uniqfile2"), "two-arg missing file found")
174     refute(manifest.has_file?("./s2/repfile"), "one-arg missing stream found")
175     refute(manifest.has_file?("./s2", "repfile"), "two-arg missing stream found")
176   end
177
178   def test_has_file_with_spaces
179     manifest = Keep::Manifest.new(". #{random_block(3)} 0:3:a\\040b\\040c\n")
180     assert(manifest.has_file?("./a b c"), "one-arg 'a b c' not found")
181     assert(manifest.has_file?(".", "a b c"), "two-arg 'a b c' not found")
182     refute(manifest.has_file?("a\\040b\\040c"), "one-arg unescaped found")
183     refute(manifest.has_file?(".", "a\\040b\\040c"), "two-arg unescaped found")
184   end
185 end