1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: Apache-2.0
18 "git.arvados.org/arvados.git/sdk/go/arvadostest"
19 "git.arvados.org/arvados.git/sdk/go/blockdigest"
23 func TestGocheck(t *testing.T) { TestingT(t) }
25 var _ = Suite(&suite{})
29 func getStackTrace() string {
30 buf := make([]byte, 1000)
31 bytesWritten := runtime.Stack(buf, false)
32 return "Stack Trace:\n" + string(buf[:bytesWritten])
35 func expectFromChannel(t *testing.T, c <-chan string, expected string) {
38 t.Fatalf("Expected to receive %s but channel was closed. %s",
42 if actual != expected {
43 t.Fatalf("Expected %s but got %s instead. %s",
50 func expectChannelClosed(t *testing.T, c <-chan interface{}) {
53 t.Fatalf("Expected channel to be closed, but received %v instead. %s",
59 func expectEqual(t *testing.T, actual interface{}, expected interface{}) {
60 if actual != expected {
61 t.Fatalf("Expected %v but received %v instead. %s",
68 func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
69 if len(actual) != len(expected) {
70 t.Fatalf("Expected %v (length %d), but received %v (length %d) instead. %s", expected, len(expected), actual, len(actual), getStackTrace())
72 for i := range actual {
73 if actual[i] != expected[i] {
74 t.Fatalf("Expected %v but received %v instead (first disagreement at position %d). %s", expected, actual, i, getStackTrace())
79 func expectFileStreamSegmentsEqual(t *testing.T, actual []FileStreamSegment, expected []FileStreamSegment) {
80 if !reflect.DeepEqual(actual, expected) {
81 t.Fatalf("Expected %v but received %v instead. %s", expected, actual, getStackTrace())
85 func expectManifestStream(t *testing.T, actual ManifestStream, expected ManifestStream) {
86 expectEqual(t, actual.StreamName, expected.StreamName)
87 expectStringSlicesEqual(t, actual.Blocks, expected.Blocks)
88 expectFileStreamSegmentsEqual(t, actual.FileStreamSegments, expected.FileStreamSegments)
91 func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
92 expectEqual(t, actual.Digest, expected.Digest)
93 expectEqual(t, actual.Size, expected.Size)
94 expectStringSlicesEqual(t, actual.Hints, expected.Hints)
97 func TestParseManifestStreamSimple(t *testing.T) {
98 m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
99 expectManifestStream(t, m, ManifestStream{StreamName: ".",
100 Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
101 FileStreamSegments: []FileStreamSegment{{0, 2310, "qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}}})
104 func TestParseBlockLocatorSimple(t *testing.T) {
105 b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
107 t.Fatalf("Unexpected error parsing block locator: %v", err)
109 d, err := blockdigest.FromString("365f83f5f808896ec834c8b595288735")
111 t.Fatalf("Unexpected error during FromString for block locator: %v", err)
113 expectBlockLocator(t, blockdigest.BlockLocator{b.Digest, b.Size, b.Hints},
114 blockdigest.BlockLocator{Digest: d,
116 Hints: []string{"K@qr1hi",
117 "Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"}})
120 func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
121 content, err := ioutil.ReadFile("testdata/short_manifest")
123 t.Fatalf("Unexpected error reading manifest from file: %v", err)
125 manifest := Manifest{Text: string(content)}
126 streamIter := manifest.StreamIter()
128 firstStream := <-streamIter
129 expectManifestStream(t,
131 ManifestStream{StreamName: ".",
132 Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475@5441920c"},
133 FileStreamSegments: []FileStreamSegment{{0, 15693477, "chr10_band0_s0_e3000000.fj"}}})
135 received, ok := <-streamIter
137 t.Fatalf("Expected streamIter to be closed, but received %v instead.",
142 func TestBlockIterLongManifest(t *testing.T) {
143 content, err := ioutil.ReadFile("testdata/long_manifest")
145 t.Fatalf("Unexpected error reading manifest from file: %v", err)
147 manifest := Manifest{Text: string(content)}
148 blockChannel := manifest.BlockIterWithDuplicates()
150 firstBlock := <-blockChannel
151 d, err := blockdigest.FromString("b746e3d2104645f2f64cd3cc69dd895d")
153 t.Fatalf("Unexpected error during FromString for block: %v", err)
155 expectBlockLocator(t,
157 blockdigest.BlockLocator{Digest: d,
159 Hints: []string{"E2866e643690156651c03d876e638e674dcd79475@5441920c"}})
161 var lastBlock blockdigest.BlockLocator
162 for lastBlock = range blockChannel {
165 expectEqual(t, blocksRead, 853)
167 d, err = blockdigest.FromString("f9ce82f59e5908d2d70e18df9679b469")
169 t.Fatalf("Unexpected error during FromString for block: %v", err)
171 expectBlockLocator(t,
173 blockdigest.BlockLocator{Digest: d,
175 Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db@5441920c"}})
178 func TestUnescape(t *testing.T) {
179 for _, testCase := range [][]string{
186 expect := testCase[1]
187 got := UnescapeName(in)
189 t.Errorf("For '%s' got '%s' instead of '%s'", in, got, expect)
194 type fsegtest struct {
195 mt string // manifest text
197 want []FileSegment // segments should be received on channel
200 func TestFileSegmentIterByName(t *testing.T) {
201 mt := arvadostest.PathologicalManifest
202 for _, testCase := range []fsegtest{
203 {mt: mt, f: "zzzz", want: nil},
204 // This case is too sensitive: it would be acceptable
205 // (even preferable) to return only one empty segment.
206 {mt: mt, f: "foo/zero", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
207 {mt: mt, f: "zero@0", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
208 {mt: mt, f: "zero@1", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
209 {mt: mt, f: "zero@4", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
210 {mt: mt, f: "zero@9", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
211 {mt: mt, f: "f", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
212 {mt: mt, f: "ooba", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 2}}},
213 {mt: mt, f: "overlapReverse/o", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}}},
214 {mt: mt, f: "overlapReverse/oo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}}},
215 {mt: mt, f: "overlapReverse/ofoo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
216 {mt: mt, f: "foo bar/baz", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
217 // This case is too sensitive: it would be better to
218 // omit the empty segment.
219 {mt: mt, f: "segmented/frob", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}, {"37b51d194a7513e45b56f6524f2d51f2+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 1}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 1}}},
220 {mt: mt, f: "segmented/oof", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
222 m := Manifest{Text: testCase.mt}
223 var got []FileSegment
224 for fs := range m.FileSegmentIterByName(testCase.f) {
225 got = append(got, *fs)
227 if !reflect.DeepEqual(got, testCase.want) {
228 t.Errorf("For %#v:\n got %#v\n want %#v", testCase.f, got, testCase.want)
233 func TestBlockIterWithBadManifest(t *testing.T) {
234 testCases := [][]string{
235 {"badstream acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt", "Invalid stream name: badstream"},
236 {"/badstream acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt", "Invalid stream name: /badstream"},
237 {". acbd18db4cc2f85cedef654fccc4a4d8+3 file1.txt", "Invalid file token: file1.txt"},
238 {". acbd18db4cc2f85cedef654fccc4a4+3 0:1:file1.txt", "No block locators found"},
239 {". acbd18db4cc2f85cedef654fccc4a4d8 0:1:file1.txt", "No block locators found"},
240 {". acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt file2.txt 1:2:file3.txt", "Invalid file token: file2.txt"},
241 {". acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt. bcde18db4cc2f85cedef654fccc4a4d8+3 1:2:file3.txt", "Invalid file token: bcde18db4cc2f85cedef654fccc4a4d8.*"},
242 {". acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt\n. acbd18db4cc2f85cedef654fccc4a4d8+3 ::file2.txt\n", "Invalid file token: ::file2.txt"},
243 {". acbd18db4cc2f85cedef654fccc4a4d8+3 bcde18db4cc2f85cedef654fccc4a4d8+3\n", "No file tokens found"},
244 {". acbd18db4cc2f85cedef654fccc4a4d8+3 ", "Invalid file token"},
245 {". acbd18db4cc2f85cedef654fccc4a4d8+3", "No file tokens found"},
246 {". 0:1:file1.txt\n", "No block locators found"},
247 {".\n", "No block locators found"},
250 for _, testCase := range testCases {
251 manifest := Manifest{Text: string(testCase[0])}
252 blockChannel := manifest.BlockIterWithDuplicates()
254 for block := range blockChannel {
258 // completed reading from blockChannel; now check for errors
259 if manifest.Err == nil {
260 t.Fatalf("Expected error")
263 matched, _ := regexp.MatchString(testCase[1], manifest.Err.Error())
265 t.Fatalf("Expected error not found. Expected: %v; Found: %v", testCase[1], manifest.Err.Error())
270 func TestNormalizeManifest(t *testing.T) {
271 m1 := Manifest{Text: `. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
272 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
273 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
275 expectEqual(t, m1.Extract(".", ".").Text,
276 `. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 0:127:md5sum.txt
279 m2 := Manifest{Text: `. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
281 expectEqual(t, m2.Extract(".", ".").Text, m2.Text)
283 m3 := Manifest{Text: `. 5348b82a029fd9e971a811ce1f71360b+43 3:40:md5sum.txt
284 . 085c37f02916da1cad16f93c54d899b7+41 0:41:md5sum.txt
285 . 8b22da26f9f433dea0a10e5ec66d73ba+43 0:43:md5sum.txt
287 expectEqual(t, m3.Extract(".", ".").Text, `. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:md5sum.txt
289 expectEqual(t, m3.Extract("/md5sum.txt", "/wiggle.txt").Text, `. 5348b82a029fd9e971a811ce1f71360b+43 085c37f02916da1cad16f93c54d899b7+41 8b22da26f9f433dea0a10e5ec66d73ba+43 3:124:wiggle.txt
292 m4 := Manifest{Text: `. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
293 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
294 ./foo 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
297 expectEqual(t, m4.Extract(".", ".").Text,
298 `./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
299 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
302 expectEqual(t, m4.Extract("./foo", ".").Text, ". 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar\n")
303 expectEqual(t, m4.Extract("./foo", "./baz").Text, "./baz 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar\n")
304 expectEqual(t, m4.Extract("./foo/bar", ".").Text, ". 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar\n")
305 expectEqual(t, m4.Extract("./foo/bar", "./baz").Text, ". 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:baz 67108864:3:baz\n")
306 expectEqual(t, m4.Extract("./foo/bar", "./quux/").Text, "./quux 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar\n")
307 expectEqual(t, m4.Extract("./foo/bar", "./quux/baz").Text, "./quux 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:baz 67108864:3:baz\n")
308 expectEqual(t, m4.Extract(".", ".").Text, `./foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
309 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
311 expectEqual(t, m4.Extract(".", "./zip").Text, `./zip/foo 204e43b8a1185621ca55a94839582e6f+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar 67108864:3:bar
312 ./zip/zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
315 expectEqual(t, m4.Extract("foo/.//bar/../../zzz/", "/waz/").Text, `./waz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
318 m5 := Manifest{Text: `. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:foo/bar
319 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
320 ./foo 204e43b8a1185621ca55a94839582e6f+67108864 3:3:bar
322 expectEqual(t, m5.Extract(".", ".").Text,
323 `./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:6:bar
324 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
327 m8 := Manifest{Text: `./a\040b\040c 59ca0efa9f5633cb0371bbc0355478d8+13 0:13:hello\040world.txt
329 expectEqual(t, m8.Extract(".", ".").Text, m8.Text)
331 m9 := Manifest{Text: ". acbd18db4cc2f85cedef654fccc4a4d8+40 0:10:one 20:10:two 10:10:one 30:10:two\n"}
332 expectEqual(t, m9.Extract("", "").Text, ". acbd18db4cc2f85cedef654fccc4a4d8+40 0:20:one 20:20:two\n")
334 m10 := Manifest{Text: ". acbd18db4cc2f85cedef654fccc4a4d8+40 0:10:one 20:10:two 10:10:one 30:10:two\n"}
335 expectEqual(t, m10.Extract("./two", "./three").Text, ". acbd18db4cc2f85cedef654fccc4a4d8+40 20:20:three\n")
337 m11 := Manifest{Text: arvadostest.PathologicalManifest}
338 expectEqual(t, m11.Extract(".", ".").Text, `. acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K@xyzzy 0:1:f 1:4:ooba 5:1:r 5:4:rbaz 0:0:zero@0 0:0:zero@1 0:0:zero@4 0:0:zero@9
339 ./foo acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo 0:3:foo 0:0:zero
340 ./foo\040bar acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:baz 0:3:baz\040waz
341 ./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 2:1:o 2:1:ofoo 0:3:ofoo 1:2:oo
342 ./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 3:1:frob 1:2:oof 0:1:oof
345 m12 := Manifest{Text: `./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:3:bar
346 ./zzz 204e43b8a1185621ca55a94839582e6f+67108864 0:999:zzz
347 ./foo/baz 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
350 expectEqual(t, m12.Extract("./foo", ".").Text, `. 204e43b8a1185621ca55a94839582e6f+67108864 0:3:bar
351 ./baz 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
353 expectEqual(t, m12.Extract("./foo", "./blub").Text, `./blub 204e43b8a1185621ca55a94839582e6f+67108864 0:3:bar
354 ./blub/baz 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
356 expectEqual(t, m12.Extract("./foo", "./blub/").Text, `./blub 204e43b8a1185621ca55a94839582e6f+67108864 0:3:bar
357 ./blub/baz 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
359 expectEqual(t, m12.Extract("./foo/", "./blub/").Text, `./blub 204e43b8a1185621ca55a94839582e6f+67108864 0:3:bar
360 ./blub/baz 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:3:bar
363 m13 := Manifest{Text: `foo 204e43b8a1185621ca55a94839582e6f+67108864 0:3:bar
366 expectEqual(t, m13.Extract(".", ".").Text, ``)
367 expectEqual(t, m13.Extract(".", ".").Err.Error(), "Invalid stream name: foo")
369 m14 := Manifest{Text: `./foo 204e43b8a1185621ca55a94839582e6f+67108864 67108863:3:bar
372 expectEqual(t, m14.Extract(".", ".").Text, ``)
373 expectEqual(t, m14.Extract(".", ".").Err.Error(), "File segment 67108863:3:bar extends past end of stream 67108864")
375 m15 := Manifest{Text: `./foo 204e43b8a1185621ca55a94839582e6f+67108864 0:3bar
378 expectEqual(t, m15.Extract(".", ".").Text, ``)
379 expectEqual(t, m15.Extract(".", ".").Err.Error(), "Invalid file token: 0:3bar")
382 func (s *suite) TestExtractFromLargeManifest(c *C) {
383 m := Manifest{Text: s.generateManifest(c, 200, 200, 2, 4<<20)}
384 fmt.Println(m.Extract("./dir77/file88", "./extracted").Text)
386 func (s *suite) generateManifest(c *C, dirCount, filesPerDir, blocksPerFile, interleaveChunk int) string {
387 c.Logf("%s building manifest with dirCount=%d filesPerDir=%d blocksPerFile=%d", time.Now(), dirCount, filesPerDir, blocksPerFile)
388 const blksize = 1 << 26
389 mb := bytes.NewBuffer(make([]byte, 0, 40000000))
391 for i := 0; i < dirCount; i++ {
392 fmt.Fprintf(mb, "./dir%d", i)
393 for j := 0; j < filesPerDir; j++ {
394 for k := 0; k < blocksPerFile; k++ {
396 fmt.Fprintf(mb, " %032x+%d+A%040x@%08x", blkid, blksize, blkid, blkid)
399 for j := 0; j < filesPerDir; j++ {
400 if interleaveChunk == 0 {
401 fmt.Fprintf(mb, " %d:%d:dir%d/file%d", (filesPerDir-j-1)*blocksPerFile*blksize, blocksPerFile*blksize, j, j)
404 for todo := int64(blocksPerFile) * int64(blksize); todo > 0; todo -= int64(interleaveChunk) {
405 size := int64(interleaveChunk)
409 offset := rand.Int63n(int64(blocksPerFile)*int64(blksize)*int64(filesPerDir) - size)
410 fmt.Fprintf(mb, " %d:%d:dir%d/file%d", offset, size, j, j)
413 mb.Write([]byte{'\n'})
418 func TestFirstBlock(t *testing.T) {
420 expectEqual(t, firstBlock([]uint64{1, 2, 3, 4}, 3), 2)
421 expectEqual(t, firstBlock([]uint64{1, 2, 3, 4, 5, 6}, 4), 3)