Add 'apps/arv-web/' from commit 'f9732ad8460d013c2f28363655d0d1b91894dca5'
[arvados.git] / sdk / ruby / test / test_keep_manifest.rb
1 require "minitest/autorun"
2 require "arvados/keep"
3
4 def random_block(size=nil)
5   sprintf("%032x+%d", rand(16 ** 32), size || rand(64 * 1024 * 1024))
6 end
7
8 class ManifestTest < Minitest::Test
9   SIMPLEST_MANIFEST = ". #{random_block(9)} 0:9:simple.txt\n"
10   MULTIBLOCK_FILE_MANIFEST =
11     [". #{random_block(8)} 0:4:repfile 4:4:uniqfile",
12      "./s1 #{random_block(6)} 0:3:repfile 3:3:uniqfile",
13      ". #{random_block(8)} 0:7:uniqfile2 7:1:repfile\n"].join("\n")
14   MULTILEVEL_MANIFEST =
15     [". #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
16      "./dir1 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
17      "./dir1/subdir #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n",
18      "./dir2 #{random_block(9)} 0:3:file1 3:3:file2 6:3:file3\n"].join("")
19
20   def check_stream(stream, exp_name, exp_blocks, exp_files)
21     assert_equal(exp_name, stream.first)
22     assert_equal(exp_blocks, stream[1].map(&:to_s))
23     assert_equal(exp_files, stream.last)
24   end
25
26   def test_simple_each_line_array
27     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
28     stream_name, block_s, file = SIMPLEST_MANIFEST.strip.split
29     stream_a = manifest.each_line.to_a
30     assert_equal(1, stream_a.size, "wrong number of streams")
31     check_stream(stream_a.first, stream_name, [block_s], [file])
32   end
33
34   def test_simple_each_line_block
35     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
36     result = []
37     manifest.each_line do |stream, blocks, files|
38       result << files
39     end
40     assert_equal([[SIMPLEST_MANIFEST.split.last]], result,
41                  "wrong result from each_line block")
42   end
43
44   def test_multilevel_each_line
45     manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
46     seen = []
47     manifest.each_line do |stream, blocks, files|
48       refute(seen.include?(stream),
49              "each_line already yielded stream #{stream}")
50       seen << stream
51       assert_equal(3, files.size, "wrong file count for stream #{stream}")
52     end
53     assert_equal(4, seen.size, "wrong number of streams")
54   end
55
56   def test_empty_each_line
57     assert_empty(Keep::Manifest.new("").each_line.to_a)
58   end
59
60   def test_empty_line_within_manifest
61     block_s = random_block
62     manifest = Keep::Manifest.
63       new([". #{block_s} 0:1:file1 1:2:file2\n",
64            "\n",
65            ". #{block_s} 3:3:file3 6:4:file4\n"].join(""))
66     streams = manifest.each_line.to_a
67     assert_equal(2, streams.size)
68     check_stream(streams[0], ".", [block_s], ["0:1:file1", "1:2:file2"])
69     check_stream(streams[1], ".", [block_s], ["3:3:file3", "6:4:file4"])
70   end
71
72   def test_backslash_escape_parsing
73     m_text = "./dir\\040name #{random_block} 0:0:file\\\\name\\011\\here.txt\n"
74     manifest = Keep::Manifest.new(m_text)
75     streams = manifest.each_line.to_a
76     assert_equal(1, streams.size, "wrong number of streams with whitespace")
77     assert_equal("./dir name", streams.first.first,
78                  "wrong stream name with whitespace")
79     assert_equal(["0:0:file\\name\t\\here.txt"], streams.first.last,
80                  "wrong filename(s) with whitespace")
81   end
82
83   def test_simple_files
84     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
85     assert_equal([[".", "simple.txt", 9]], manifest.files)
86   end
87
88   def test_multilevel_files
89     manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST)
90     seen = Hash.new { |this, key| this[key] = [] }
91     manifest.files.each do |stream, basename, size|
92       refute(seen[stream].include?(basename),
93              "each_file repeated #{stream}/#{basename}")
94       seen[stream] << basename
95       assert_equal(3, size, "wrong size for #{stream}/#{basename}")
96     end
97     seen.each_pair do |stream, basenames|
98       assert_equal(%w(file1 file2 file3), basenames.sort,
99                    "wrong file list for #{stream}")
100     end
101   end
102
103   def test_files_with_colons_in_names
104     manifest = Keep::Manifest.new(". #{random_block(9)} 0:9:file:test.txt\n")
105     assert_equal([[".", "file:test.txt", 9]], manifest.files)
106   end
107
108   def test_files_spanning_multiple_blocks
109     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
110     assert_equal([[".", "repfile", 5],
111                   [".", "uniqfile", 4],
112                   [".", "uniqfile2", 7],
113                   ["./s1", "repfile", 3],
114                   ["./s1", "uniqfile", 3]],
115                  manifest.files.sort)
116   end
117
118   def test_minimum_file_count_simple
119     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
120     assert(manifest.minimum_file_count?(1), "real minimum file count false")
121     refute(manifest.minimum_file_count?(2), "fake minimum file count true")
122   end
123
124   def test_minimum_file_count_multiblock
125     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
126     assert(manifest.minimum_file_count?(2), "low minimum file count false")
127     assert(manifest.minimum_file_count?(5), "real minimum file count false")
128     refute(manifest.minimum_file_count?(6), "fake minimum file count true")
129   end
130
131   def test_exact_file_count_simple
132     manifest = Keep::Manifest.new(SIMPLEST_MANIFEST)
133     assert(manifest.exact_file_count?(1), "exact file count false")
134     refute(manifest.exact_file_count?(0), "-1 file count true")
135     refute(manifest.exact_file_count?(2), "+1 file count true")
136   end
137
138   def test_exact_file_count_multiblock
139     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
140     assert(manifest.exact_file_count?(5), "exact file count false")
141     refute(manifest.exact_file_count?(4), "-1 file count true")
142     refute(manifest.exact_file_count?(6), "+1 file count true")
143   end
144
145   def test_has_file
146     manifest = Keep::Manifest.new(MULTIBLOCK_FILE_MANIFEST)
147     assert(manifest.has_file?("./repfile"), "one-arg repfile not found")
148     assert(manifest.has_file?(".", "repfile"), "two-arg repfile not found")
149     assert(manifest.has_file?("./s1/repfile"), "one-arg s1/repfile not found")
150     assert(manifest.has_file?("./s1", "repfile"), "two-arg s1/repfile not found")
151     refute(manifest.has_file?("./s1/uniqfile2"), "one-arg missing file found")
152     refute(manifest.has_file?("./s1", "uniqfile2"), "two-arg missing file found")
153     refute(manifest.has_file?("./s2/repfile"), "one-arg missing stream found")
154     refute(manifest.has_file?("./s2", "repfile"), "two-arg missing stream found")
155   end
156 end