5104: Add Collection class to the Ruby SDK.
[arvados.git] / sdk / ruby / test / test_collection.rb
1 require "arvados/collection"
2 require "minitest/autorun"
3 require "sdk_fixtures"
4
5 class CollectionTest < Minitest::Test
6   include SDKFixtures
7
8   TWO_BY_TWO_BLOCKS = SDKFixtures.random_blocks(2, 9)
9   TWO_BY_TWO_MANIFEST_A =
10     [". #{TWO_BY_TWO_BLOCKS.first} 0:5:f1 5:4:f2\n",
11      "./s1 #{TWO_BY_TWO_BLOCKS.last} 0:5:f1 5:4:f3\n"]
12   TWO_BY_TWO_MANIFEST_S = TWO_BY_TWO_MANIFEST_A.join("")
13
14   ### .new
15
16   def test_empty_construction
17     coll = Arv::Collection.new
18     assert_equal("", coll.manifest_text)
19   end
20
21   def test_successful_construction
22     [:SIMPLEST_MANIFEST, :MULTIBLOCK_FILE_MANIFEST, :MULTILEVEL_MANIFEST].
23         each do |manifest_name|
24       manifest_text = SDKFixtures.const_get(manifest_name)
25       coll = Arv::Collection.new(manifest_text)
26       assert_equal(manifest_text, coll.manifest_text,
27                    "did not get same manifest back out from #{manifest_name}")
28     end
29   end
30
31   def test_non_manifest_construction_error
32     ["word", ". abc def", ". #{random_block} 0:", ". / !"].each do |m_text|
33       assert_raises(ArgumentError,
34                     "built collection from manifest #{m_text.inspect}") do
35         Arv::Collection.new(m_text)
36       end
37     end
38   end
39
40   def test_file_directory_conflict_construction_error
41     assert_raises(ArgumentError) do
42       Arv::Collection.new(NAME_CONFLICT_MANIFEST)
43     end
44   end
45
46   def test_no_implicit_normalization
47     coll = Arv::Collection.new(NONNORMALIZED_MANIFEST)
48     assert_equal(NONNORMALIZED_MANIFEST, coll.manifest_text)
49   end
50
51   def test_no_implicit_normalization_from_first_import
52     coll = Arv::Collection.new
53     coll.import_manifest!(NONNORMALIZED_MANIFEST)
54     assert_equal(NONNORMALIZED_MANIFEST, coll.manifest_text)
55   end
56
57   ### .import_manifest!
58
59   def test_non_posix_path_handling
60     block = random_block(9)
61     coll = Arv::Collection.new("./.. #{block} 0:5:.\n")
62     coll.import_manifest!("./.. #{block} 5:4:..\n")
63     assert_equal("./.. #{block} 0:5:. 5:4:..\n", coll.manifest_text)
64   end
65
66   def test_escaping_through_normalization
67     coll = Arv::Collection.new(MANY_ESCAPES_MANIFEST)
68     coll.import_manifest!(MANY_ESCAPES_MANIFEST)
69     # The result should simply duplicate the file spec.
70     # The source file spec has an unescaped backslash in it.
71     # It's OK for the Collection class to properly escape that.
72     expect_text = MANY_ESCAPES_MANIFEST.sub(/ \d+:\d+:\S+/) do |file_spec|
73       file_spec.gsub(/([^\\])(\\[^\\\d])/, '\1\\\\\2') * 2
74     end
75     assert_equal(expect_text, coll.manifest_text)
76   end
77
78   def test_concatenation_from_multiple_imports(file_name="file.txt",
79                                                out_name=nil)
80     out_name ||= file_name
81     blocks = random_blocks(2, 9)
82     coll = Arv::Collection.new
83     blocks.each do |block|
84       coll.import_manifest!(". #{block} 1:8:#{file_name}\n")
85     end
86     assert_equal(". #{blocks.join(' ')} 1:8:#{out_name} 10:8:#{out_name}\n",
87                  coll.manifest_text)
88   end
89
90   def test_concatenation_from_multiple_escaped_imports
91     test_concatenation_from_multiple_imports('a\040\141.txt', 'a\040a.txt')
92   end
93
94   def test_concatenation_with_locator_overlap(over_index=0)
95     blocks = random_blocks(4, 2)
96     coll = Arv::Collection.new(". #{blocks.join(' ')} 0:8:file\n")
97     coll.import_manifest!(". #{blocks[over_index, 2].join(' ')} 0:4:file\n")
98     assert_equal(". #{blocks.join(' ')} 0:8:file #{over_index * 2}:4:file\n",
99                  coll.manifest_text)
100   end
101
102   def test_concatenation_with_middle_locator_overlap
103     test_concatenation_with_locator_overlap(1)
104   end
105
106   def test_concatenation_with_end_locator_overlap
107     test_concatenation_with_locator_overlap(2)
108   end
109
110   def test_concatenation_with_partial_locator_overlap
111     blocks = random_blocks(3, 3)
112     coll = Arv::Collection.new(". #{blocks[0, 2].join(' ')} 0:6:overlap\n")
113     coll.import_manifest!(". #{blocks[1, 2].join(' ')} 0:6:overlap\n")
114     assert_equal(". #{blocks.join(' ')} 0:6:overlap 3:6:overlap\n",
115                  coll.manifest_text)
116   end
117
118   ### .normalize!
119
120   def test_normalize
121     block = random_block
122     coll = Arv::Collection.new(". #{block} 0:0:f2 0:0:f1\n")
123     coll.normalize!
124     assert_equal(". #{block} 0:0:f1 0:0:f2\n", coll.manifest_text)
125   end
126
127   ### .copy!
128
129   def test_simple_file_copy
130     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
131     coll.copy!("./simple.txt", "./new")
132     assert_equal(SIMPLEST_MANIFEST.sub(" 0:9:", " 0:9:new 0:9:"),
133                  coll.manifest_text)
134   end
135
136   def test_copy_file_into_other_stream(target="./s1/f2", basename="f2")
137     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
138     coll.copy!("./f2", target)
139     expected = "%s./s1 %s 0:5:f1 14:4:%s 5:4:f3\n" %
140       [TWO_BY_TWO_MANIFEST_A.first,
141        TWO_BY_TWO_BLOCKS.reverse.join(" "), basename]
142     assert_equal(expected, coll.manifest_text)
143   end
144
145   def test_implicit_copy_file_into_other_stream
146     test_copy_file_into_other_stream("./s1")
147   end
148
149   def test_copy_file_into_other_stream_with_new_name
150     test_copy_file_into_other_stream("./s1/f2a", "f2a")
151   end
152
153   def test_copy_file_over_in_other_stream(target="./s1/f1")
154     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
155     coll.copy!("./f1", target)
156     expected = "%s./s1 %s 0:5:f1 14:4:f3\n" %
157       [TWO_BY_TWO_MANIFEST_A.first, TWO_BY_TWO_BLOCKS.join(" ")]
158     assert_equal(expected, coll.manifest_text)
159   end
160
161   def test_implicit_copy_file_over_in_other_stream
162     test_copy_file_over_in_other_stream("./s1")
163   end
164
165   def test_simple_stream_copy
166     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
167     coll.copy!("./s1", "./sNew")
168     new_line = TWO_BY_TWO_MANIFEST_A.last.sub("./s1 ", "./sNew ")
169     assert_equal(TWO_BY_TWO_MANIFEST_S + new_line, coll.manifest_text)
170   end
171
172   def test_copy_stream_into_other_stream(target="./dir2/subdir",
173                                          basename="subdir")
174     coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
175     coll.copy!("./dir1/subdir", target)
176     new_line = MULTILEVEL_MANIFEST.lines[4].sub("./dir1/subdir ",
177                                                 "./dir2/#{basename} ")
178     assert_equal(MULTILEVEL_MANIFEST + new_line, coll.manifest_text)
179   end
180
181   def test_implicit_copy_stream_into_other_stream
182     test_copy_stream_into_other_stream("./dir2")
183   end
184
185   def test_copy_stream_into_other_stream_with_new_name
186     test_copy_stream_into_other_stream("./dir2/newsub", "newsub")
187   end
188
189   def test_copy_stream_over_empty_stream
190     coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
191     (1..3).each do |file_num|
192       coll.remove!("./dir0/subdir/file#{file_num}")
193     end
194     coll.copy!("./dir1/subdir", "./dir0")
195     expected = MULTILEVEL_MANIFEST.lines
196     expected[2] = expected[4].sub("./dir1/", "./dir0/")
197     assert_equal(expected.join(""), coll.manifest_text)
198   end
199
200   def test_copy_stream_over_file_raises_ENOTDIR
201     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
202     assert_raises(Errno::ENOTDIR) do
203       coll.copy!("./s1", "./f2")
204     end
205   end
206
207   def test_copy_stream_over_nonempty_stream_merges_and_overwrites
208     blocks = random_blocks(3, 9)
209     manifest_a =
210       ["./subdir #{blocks[0]} 0:1:s1 1:2:zero\n",
211        "./zdir #{blocks[1]} 0:9:zfile\n",
212        "./zdir/subdir #{blocks[2]} 0:1:s2 1:2:zero\n"]
213     coll = Arv::Collection.new(manifest_a.join(""))
214     coll.copy!("./subdir", "./zdir")
215     manifest_a[2] = "./zdir/subdir %s %s 0:1:s1 9:1:s2 1:2:zero\n" %
216       [blocks[0], blocks[2]]
217     assert_equal(manifest_a.join(""), coll.manifest_text)
218   end
219
220   def test_copy_stream_into_substream(source="./dir1",
221                                       target="./dir1/subdir/dir1")
222     coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
223     coll.copy!(source, target)
224     expected = MULTILEVEL_MANIFEST.lines.flat_map do |line|
225       [line, line.gsub(/^#{Regexp.escape(source)}([\/ ])/, "#{target}\\1")].uniq
226     end
227     assert_equal(expected.sort.join(""), coll.manifest_text)
228   end
229
230   def test_copy_root
231     test_copy_stream_into_substream(".", "./root")
232   end
233
234   def test_adding_to_root_after_copy
235     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
236     coll.copy!(".", "./root")
237     coll.import_manifest!(COLON_FILENAME_MANIFEST)
238     got_lines = coll.manifest_text.lines
239     assert_equal(2, got_lines.size)
240     assert_match(/^\. \S{33,} \S{33,} 0:9:file:test\.txt 9:9:simple\.txt\n/,
241                  got_lines.first)
242     assert_equal(SIMPLEST_MANIFEST.sub(". ", "./root "), got_lines.last)
243   end
244
245   def test_copy_chaining
246     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
247     coll.copy!("./simple.txt", "./a").copy!("./a", "./b")
248     assert_equal(SIMPLEST_MANIFEST.sub(" 0:9:", " 0:9:a 0:9:b 0:9:"),
249                  coll.manifest_text)
250   end
251
252   def prep_two_collections_for_copy(src_stream, dst_stream)
253     blocks = random_blocks(2, 8)
254     src_text = "#{src_stream} #{blocks.first} 0:8:f1\n"
255     dst_text = "#{dst_stream} #{blocks.last} 0:8:f2\n"
256     return [blocks, src_text, dst_text,
257             Arv::Collection.new(src_text.dup),
258             Arv::Collection.new(dst_text.dup)]
259   end
260
261   def test_copy_file_from_other_collection(src_stream=".", dst_stream="./s1")
262     blocks, src_text, dst_text, src_coll, dst_coll =
263       prep_two_collections_for_copy(src_stream, dst_stream)
264     dst_coll.copy!("#{src_stream}/f1", dst_stream, src_coll)
265     assert_equal("#{dst_stream} #{blocks.join(' ')} 0:8:f1 8:8:f2\n",
266                  dst_coll.manifest_text)
267     assert_equal(src_text, src_coll.manifest_text)
268   end
269
270   def test_copy_file_from_other_collection_to_root
271     test_copy_file_from_other_collection("./s1", ".")
272   end
273
274   def test_copy_stream_from_other_collection
275     blocks, src_text, dst_text, src_coll, dst_coll =
276       prep_two_collections_for_copy("./s2", "./s1")
277     dst_coll.copy!("./s2", "./s1", src_coll)
278     assert_equal(dst_text + src_text.sub("./s2 ", "./s1/s2 "),
279                  dst_coll.manifest_text)
280     assert_equal(src_text, src_coll.manifest_text)
281   end
282
283   def test_copy_stream_from_other_collection_to_root
284     blocks, src_text, dst_text, src_coll, dst_coll =
285       prep_two_collections_for_copy("./s1", ".")
286     dst_coll.copy!("./s1", ".", src_coll)
287     assert_equal(dst_text + src_text, dst_coll.manifest_text)
288     assert_equal(src_text, src_coll.manifest_text)
289   end
290
291   def test_copy_empty_source_path_raises_ArgumentError(src="", dst="./s1")
292     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
293     assert_raises(ArgumentError) do
294       coll.copy!(src, dst)
295     end
296   end
297
298   def test_copy_empty_destination_path_raises_ArgumentError
299     test_copy_empty_source_path_raises_ArgumentError(".", "")
300   end
301
302   ### .rename!
303
304   def test_simple_file_rename
305     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
306     coll.rename!("./simple.txt", "./new")
307     assert_equal(SIMPLEST_MANIFEST.sub(":simple.txt", ":new"),
308                  coll.manifest_text)
309   end
310
311   def test_rename_file_into_other_stream(target="./s1/f2", basename="f2")
312     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
313     coll.rename!("./f2", target)
314     expected = ". %s 0:5:f1\n./s1 %s 0:5:f1 14:4:%s 5:4:f3\n" %
315       [TWO_BY_TWO_BLOCKS.first,
316        TWO_BY_TWO_BLOCKS.reverse.join(" "), basename]
317     assert_equal(expected, coll.manifest_text)
318   end
319
320   def test_implicit_rename_file_into_other_stream
321     test_rename_file_into_other_stream("./s1")
322   end
323
324   def test_rename_file_into_other_stream_with_new_name
325     test_rename_file_into_other_stream("./s1/f2a", "f2a")
326   end
327
328   def test_rename_file_over_in_other_stream(target="./s1/f1")
329     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
330     coll.rename!("./f1", target)
331     expected = ". %s 5:4:f2\n./s1 %s 0:5:f1 14:4:f3\n" %
332       [TWO_BY_TWO_BLOCKS.first, TWO_BY_TWO_BLOCKS.join(" ")]
333     assert_equal(expected, coll.manifest_text)
334   end
335
336   def test_implicit_rename_file_over_in_other_stream
337     test_rename_file_over_in_other_stream("./s1")
338   end
339
340   def test_simple_stream_rename
341     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
342     coll.rename!("./s1", "./newS")
343     assert_equal(TWO_BY_TWO_MANIFEST_S.sub("\n./s1 ", "\n./newS "),
344                  coll.manifest_text)
345   end
346
347   def test_rename_stream_into_other_stream(target="./dir2/subdir",
348                                            basename="subdir")
349     coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
350     coll.rename!("./dir1/subdir", target)
351     expected = MULTILEVEL_MANIFEST.lines
352     replaced_line = expected.delete_at(4)
353     expected << replaced_line.sub("./dir1/subdir ", "./dir2/#{basename} ")
354     assert_equal(expected.join(""), coll.manifest_text)
355   end
356
357   def test_implicit_rename_stream_into_other_stream
358     test_rename_stream_into_other_stream("./dir2")
359   end
360
361   def test_rename_stream_into_other_stream_with_new_name
362     test_rename_stream_into_other_stream("./dir2/newsub", "newsub")
363   end
364
365   def test_rename_stream_over_empty_stream
366     coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
367     (1..3).each do |file_num|
368       coll.remove!("./dir0/subdir/file#{file_num}")
369     end
370     coll.rename!("./dir1/subdir", "./dir0")
371     expected = MULTILEVEL_MANIFEST.lines
372     expected[2] = expected.delete_at(4).sub("./dir1/", "./dir0/")
373     assert_equal(expected.sort.join(""), coll.manifest_text)
374   end
375
376   def test_rename_stream_over_file_raises_ENOTDIR
377     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
378     assert_raises(Errno::ENOTDIR) do
379       coll.rename!("./s1", "./f2")
380     end
381   end
382
383   def test_rename_stream_over_nonempty_stream_raises_ENOTEMPTY
384     coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
385     assert_raises(Errno::ENOTEMPTY) do
386       coll.rename!("./dir1/subdir", "./dir0")
387     end
388   end
389
390   def test_rename_stream_into_substream(source="./dir1",
391                                         target="./dir1/subdir/dir1")
392     coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
393     coll.rename!(source, target)
394     assert_equal(MULTILEVEL_MANIFEST.gsub(/^#{Regexp.escape(source)}([\/ ])/m,
395                                           "#{target}\\1"),
396                  coll.manifest_text)
397   end
398
399   def test_rename_root
400     test_rename_stream_into_substream(".", "./root")
401   end
402
403   def test_adding_to_root_after_rename
404     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
405     coll.rename!(".", "./root")
406     coll.import_manifest!(SIMPLEST_MANIFEST)
407     assert_equal(SIMPLEST_MANIFEST + SIMPLEST_MANIFEST.sub(". ", "./root "),
408                  coll.manifest_text)
409   end
410
411   def test_rename_chaining
412     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
413     coll.rename!("./simple.txt", "./x").rename!("./x", "./simple.txt")
414     assert_equal(SIMPLEST_MANIFEST, coll.manifest_text)
415   end
416
417   ### .remove!
418
419   def test_simple_remove
420     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S.dup)
421     coll.remove!("./f2")
422     assert_equal(TWO_BY_TWO_MANIFEST_S.sub(" 5:4:f2", ""), coll.manifest_text)
423   end
424
425   def empty_stream_and_assert(expect_index=0)
426     coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
427     yield coll
428     assert_equal(TWO_BY_TWO_MANIFEST_A[expect_index], coll.manifest_text)
429   end
430
431   def test_remove_all_files_in_substream
432     empty_stream_and_assert do |coll|
433       coll.remove!("./s1/f1")
434       coll.remove!("./s1/f3")
435     end
436   end
437
438   def test_remove_all_files_in_root_stream
439     empty_stream_and_assert(1) do |coll|
440       coll.remove!("./f1")
441       coll.remove!("./f2")
442     end
443   end
444
445   def test_remove_empty_stream
446     empty_stream_and_assert do |coll|
447       coll.remove!("./s1/f1")
448       coll.remove!("./s1/f3")
449       coll.remove!("./s1")
450     end
451   end
452
453   def test_recursive_remove
454     empty_stream_and_assert do |coll|
455       coll.remove!("./s1", recursive: true)
456     end
457   end
458
459   def test_recursive_remove_on_files
460     empty_stream_and_assert do |coll|
461       coll.remove!("./s1/f1", recursive: true)
462       coll.remove!("./s1/f3", recursive: true)
463     end
464   end
465
466   def test_chaining_removes
467     empty_stream_and_assert do |coll|
468       coll.remove!("./s1/f1").remove!("./s1/f3")
469     end
470   end
471
472   def test_remove_last_file
473     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
474     coll.remove!("./simple.txt")
475     assert_equal("", coll.manifest_text)
476   end
477
478   def test_remove_root_stream
479     coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
480     coll.remove!(".", recursive: true)
481     assert_equal("", coll.manifest_text)
482   end
483
484   def test_remove_nonexistent_file_raises_ENOENT(path="./NoSuchFile")
485     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
486     assert_raises(Errno::ENOENT) do
487       coll.remove!(path)
488     end
489   end
490
491   def test_remove_from_nonexistent_stream_raises_ENOENT
492     test_remove_nonexistent_file_raises_ENOENT("./NoSuchStream/simple.txt")
493   end
494
495   def test_remove_nonempty_stream_raises_ENOTEMPTY
496     coll = Arv::Collection.new(MULTILEVEL_MANIFEST)
497     assert_raises(Errno::ENOTEMPTY) do
498       coll.remove!("./dir1/subdir")
499     end
500   end
501
502   def test_remove_empty_string_raises_ArgumentError
503     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
504     assert_raises(ArgumentError) do
505       coll.remove!("")
506     end
507   end
508 end