9397: Use manifest.FileSegmentForPath to get manifest segment for a file path. Cache...
authorradhika <radhika@curoverse.com>
Fri, 3 Feb 2017 00:16:25 +0000 (19:16 -0500)
committerradhika <radhika@curoverse.com>
Fri, 3 Feb 2017 00:16:25 +0000 (19:16 -0500)
the same collection repeatedly. If no manifest segment found for a mounted path, log that fact.

sdk/go/manifest/manifest.go
services/crunch-run/crunchrun.go
services/crunch-run/crunchrun_test.go

index 22b1c974e634cd8229b645421ecb09480807c000..f6656b4f2b8ae797e613110a27cdd938782ff941 100644 (file)
@@ -265,6 +265,19 @@ func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
        return ch
 }
 
+func (m *Manifest) FileSegmentForPath(filepath string) string {
+       dir := "."
+       file := filepath
+       if idx := strings.LastIndex(filepath, "/"); idx >= 0 {
+               dir = "./" + filepath[0:idx]
+               file = filepath[idx+1:]
+       }
+       for fs := range m.FileSegmentIterByName(filepath) {
+               return fmt.Sprintf("%v %v %v:%v:%v", dir, fs.Locator, fs.Offset, fs.Len, file)
+       }
+       return ""
+}
+
 // Blocks may appear multiple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
index fa8ee13f7578d53da306bda9f77ec19592c75c8a..876df03d2c5ca83385e68684b76807d4ec622278 100644 (file)
@@ -713,6 +713,8 @@ func (runner *ContainerRunner) CaptureOutput() error {
        return nil
 }
 
+var outputCollections = make(map[string]arvados.Collection)
+
 // Fetch the collection for the mnt.PortableDataHash
 // Return the manifest_text fragment corresponding to the specified mnt.Path
 //  after making any required updates.
@@ -730,12 +732,17 @@ func (runner *ContainerRunner) CaptureOutput() error {
 //    "path":"/subdir1/subdir2"
 //    "path":"/subdir/filename" etc
 func (runner *ContainerRunner) getCollectionManifestForPath(mnt arvados.Mount, bindSuffix string) (string, error) {
-       var collection arvados.Collection
-       err := runner.ArvClient.Get("collections", mnt.PortableDataHash, nil, &collection)
-       if err != nil {
-               return "", fmt.Errorf("While getting collection for %v: %v", mnt.PortableDataHash, err)
+       collection := outputCollections[mnt.PortableDataHash]
+       if collection.PortableDataHash == "" {
+               err := runner.ArvClient.Get("collections", mnt.PortableDataHash, nil, &collection)
+               if err != nil {
+                       return "", fmt.Errorf("While getting collection for %v: %v", mnt.PortableDataHash, err)
+               }
+               outputCollections[mnt.PortableDataHash] = collection
        }
 
+       manifest := manifest.Manifest{Text: collection.ManifestText}
+
        manifestText := ""
        if mnt.Path == "" || mnt.Path == "/" {
                // no path specified; return the entire manifest text after making adjustments
@@ -770,30 +777,21 @@ func (runner *ContainerRunner) getCollectionManifestForPath(mnt arvados.Mount, b
                                break
                        } else {
                                // look for a matching file in this stream
-                               if tokens[0] == pathSubdir {
-                                       // path refers to a file in this stream
-                                       for _, token := range tokens {
-                                               if strings.Index(token, ":"+pathFileName) > 0 {
-                                                       // found the file in the stream; discard all other file tokens
-                                                       for _, t := range tokens {
-                                                               if strings.Index(t, ":") == -1 {
-                                                                       manifestText += (" " + t)
-                                                               } else {
-                                                                       break // done reading all non-file tokens of this stream
-                                                               }
-                                                       }
-                                                       manifestText = strings.Trim(manifestText, " ")
-                                                       token = strings.Replace(token, ":"+pathFileName, ":"+bindFileName, -1)
-                                                       manifestText += (" " + token + "\n")
-                                                       manifestText = strings.Replace(manifestText, pathSubdir, bindSubdir, -1)
-                                                       break
-                                               }
-                                       }
+                               fs := manifest.FileSegmentForPath(mntPath[1:])
+                               if fs != "" {
+                                       manifestText = strings.Replace(fs, ":"+pathFileName, ":"+bindFileName, -1)
+                                       manifestText = strings.Replace(manifestText, pathSubdir, bindSubdir, -1)
+                                       manifestText += "\n"
+                                       break
                                }
                        }
                }
        }
 
+       if manifestText == "" {
+               runner.CrunchLog.Printf("No manifest segment found for bind '%v' with path '%v'", bindSuffix, mnt.Path)
+       }
+
        return manifestText, nil
 }
 
index 51549aeac8bafc00a4764e17cbfc71c72c9af263..8d0322c02ca075ae643458ab647469a1ec751403 100644 (file)
@@ -57,8 +57,8 @@ var hwImageId = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea
 var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d@5693216f 0:46:md5sum.txt\n"
 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
 
-var subdirManifest = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 27:5649:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt\n./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt\n./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:19:file2_in_subdir2.txt\n"
-var subdirPDH = "a0def87f80dd594d4675809e83bd4f15+367"
+var normalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 27:5649:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt\n./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt\n./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt\n"
+var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
 
 var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
 var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
@@ -185,8 +185,8 @@ func (client *ArvTestClient) Get(resourceType string, uuid string, parameters ar
                        output.(*arvados.Collection).ManifestText = hwManifest
                } else if uuid == otherPDH {
                        output.(*arvados.Collection).ManifestText = otherManifest
-               } else if uuid == subdirPDH {
-                       output.(*arvados.Collection).ManifestText = subdirManifest
+               } else if uuid == normalizedWithSubdirsPDH {
+                       output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
                }
        }
        if resourceType == "containers" {
@@ -1194,7 +1194,7 @@ func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
 
                                c.Check(-1, Not(Equals), strings.Index(manifest, "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out"))
 
-                               origManifestWithDotReplacedAsFoo := strings.Replace(subdirManifest, "./", "./foo/", -1)
+                               origManifestWithDotReplacedAsFoo := strings.Replace(normalizedManifestWithSubdirs, "./", "./foo/", -1)
                                c.Check(-1, Not(Equals), strings.Index(manifest, "./foo"+origManifestWithDotReplacedAsFoo[1:]))
 
                                c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 9:18:bar"))
@@ -1203,7 +1203,7 @@ func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
 
                                c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 9:18:sub1file2"))
 
-                               c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:19:sub2file2"))
+                               c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:18:sub2file2"))
                        }
                }
        }
@@ -1252,7 +1252,7 @@ func (s *TestSuite) TestStdoutWithMountPointForFileUnderOutputDir(c *C) {
 
                                c.Check(-1, Not(Equals), strings.Index(manifest, "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out"))
 
-                               origManifestWithDotReplacedAsFoo := strings.Replace(subdirManifest, "./", "./foo/", -1)
+                               origManifestWithDotReplacedAsFoo := strings.Replace(normalizedManifestWithSubdirs, "./", "./foo/", -1)
                                c.Check(-1, Not(Equals), strings.Index(manifest, "./foo"+origManifestWithDotReplacedAsFoo[1:]))
 
                                c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890@569fa8c3 9:18:bar"))
@@ -1261,7 +1261,7 @@ func (s *TestSuite) TestStdoutWithMountPointForFileUnderOutputDir(c *C) {
 
                                c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234@569fa8c4 9:18:sub1file2"))
 
-                               c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:19:sub2file2"))
+                               c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211@569fa8c5 9:18:sub2file2"))
                        }
                }
        }