14327: Add comments
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 18 Dec 2018 19:08:58 +0000 (14:08 -0500)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Tue, 18 Dec 2018 19:08:58 +0000 (14:08 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

sdk/cwl/arvados_cwl/pathmapper.py
sdk/cwl/tests/test_pathmapper.py

index c844311d55d0c9c4db853c8d3036bf4d10a0f9b9..0b2a22788e6f98537b0f5a3437a2d540a57d47ee 100644 (file)
@@ -120,6 +120,14 @@ class ArvPathMapper(PathMapper):
             raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
 
     def needs_new_collection(self, srcobj, prefix=""):
+        """Check if files need to be staged into a new collection.
+
+        If all the files are in the same collection and in the same
+        paths they would be staged to, return False.  Otherwise, a new
+        collection is needed with files copied/created in the
+        appropriate places.
+        """
+
         loc = srcobj["location"]
         if loc.startswith("_:"):
             return True
@@ -134,10 +142,9 @@ class ArvPathMapper(PathMapper):
                 prefix = loc+"/"
         if srcobj["class"] == "File" and loc not in self._pathmap:
             return True
-        if srcobj.get("secondaryFiles"):
-            for s in srcobj["secondaryFiles"]:
-                if self.needs_new_collection(s, prefix):
-                    return True
+        for s in srcobj.get("secondaryFiles", []):
+            if self.needs_new_collection(s, prefix):
+                return True
         if srcobj.get("listing"):
             prefix = "%s%s/" % (prefix, srcobj["basename"])
             for l in srcobj["listing"]:
@@ -195,6 +202,10 @@ class ArvPathMapper(PathMapper):
             elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
                 (srcobj["location"].startswith("_:") and "contents" in srcobj)):
 
+                # If all secondary files/directories are located in
+                # the same collection as the primary file and the
+                # paths and names that are consistent with staging,
+                # don't create a new collection.
                 if not self.needs_new_collection(srcobj):
                     continue
 
index fe8c2536c0ec39643a6b2d5142f2b74306266af8..b78e89012ad62c5f952476da0553b2d26dac5fd3 100644 (file)
@@ -105,6 +105,8 @@ class TestPathmap(unittest.TestCase):
 
     def test_needs_new_collection(self):
         arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
+
+        # Plain file.  Don't need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -114,9 +116,12 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
         self.assertFalse(p.needs_new_collection(a))
 
+        # A file that isn't in the pathmap (for some reason).  Need a new collection.
         p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
         self.assertTrue(p.needs_new_collection(a))
 
+        # A file with a secondary file in the same collection.  Don't need
+        # a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -132,6 +137,8 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
         self.assertFalse(p.needs_new_collection(a))
 
+        # Secondary file is in a different collection from the
+        # a new collectionprimary.  Need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -147,6 +154,8 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
         self.assertTrue(p.needs_new_collection(a))
 
+        # Secondary file should be staged to a different name than
+        # path in location.  Need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -162,6 +171,7 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
         self.assertTrue(p.needs_new_collection(a))
 
+        # Secondary file is a directory.  Do not need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -183,6 +193,7 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
         self.assertFalse(p.needs_new_collection(a))
 
+        # Secondary file is a renamed directory.  Need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",
@@ -204,6 +215,7 @@ class TestPathmap(unittest.TestCase):
         p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
         self.assertTrue(p.needs_new_collection(a))
 
+        # Secondary file is a file literal.  Need a new collection.
         a = {
             "class": "File",
             "location": "keep:99999999999999999999999999999991+99/hw.py",