14360: Merge branch 'master' into 14360-dispatch-cloud
authorTom Clegg <tclegg@veritasgenetics.com>
Thu, 20 Dec 2018 18:58:26 +0000 (13:58 -0500)
committerTom Clegg <tclegg@veritasgenetics.com>
Thu, 20 Dec 2018 18:58:26 +0000 (13:58 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

apps/workbench/Gemfile.lock
sdk/cwl/arvados_cwl/pathmapper.py
sdk/cwl/test_with_arvbox.sh
sdk/cwl/tests/test_pathmapper.py
services/api/Gemfile.lock

index e06e416bbd8294f6a65febec3409bffb12b07b8d..cc45ca66f2cb9bb9f09efa4269f7533fe572bdb0 100644 (file)
@@ -9,38 +9,38 @@ GEM
   remote: https://rubygems.org/
   specs:
     RedCloth (4.3.2)
-    actionmailer (4.2.10)
-      actionpack (= 4.2.10)
-      actionview (= 4.2.10)
-      activejob (= 4.2.10)
+    actionmailer (4.2.11)
+      actionpack (= 4.2.11)
+      actionview (= 4.2.11)
+      activejob (= 4.2.11)
       mail (~> 2.5, >= 2.5.4)
       rails-dom-testing (~> 1.0, >= 1.0.5)
-    actionpack (4.2.10)
-      actionview (= 4.2.10)
-      activesupport (= 4.2.10)
+    actionpack (4.2.11)
+      actionview (= 4.2.11)
+      activesupport (= 4.2.11)
       rack (~> 1.6)
       rack-test (~> 0.6.2)
       rails-dom-testing (~> 1.0, >= 1.0.5)
       rails-html-sanitizer (~> 1.0, >= 1.0.2)
-    actionview (4.2.10)
-      activesupport (= 4.2.10)
+    actionview (4.2.11)
+      activesupport (= 4.2.11)
       builder (~> 3.1)
       erubis (~> 2.7.0)
       rails-dom-testing (~> 1.0, >= 1.0.5)
       rails-html-sanitizer (~> 1.0, >= 1.0.3)
-    activejob (4.2.10)
-      activesupport (= 4.2.10)
+    activejob (4.2.11)
+      activesupport (= 4.2.11)
       globalid (>= 0.3.0)
-    activemodel (4.2.10)
-      activesupport (= 4.2.10)
+    activemodel (4.2.11)
+      activesupport (= 4.2.11)
       builder (~> 3.1)
-    activerecord (4.2.10)
-      activemodel (= 4.2.10)
-      activesupport (= 4.2.10)
+    activerecord (4.2.11)
+      activemodel (= 4.2.11)
+      activesupport (= 4.2.11)
       arel (~> 6.0)
     activerecord-nulldb-adapter (0.3.8)
       activerecord (>= 2.0.0)
-    activesupport (4.2.10)
+    activesupport (4.2.11)
       i18n (~> 0.7)
       minitest (~> 5.1)
       thread_safe (~> 0.3, >= 0.3.4)
@@ -92,7 +92,7 @@ GEM
       execjs
     coffee-script-source (1.12.2)
     commonjs (0.2.7)
-    concurrent-ruby (1.0.5)
+    concurrent-ruby (1.1.4)
     crass (1.0.4)
     deep_merge (1.2.1)
     docile (1.1.5)
@@ -159,15 +159,15 @@ GEM
     loofah (2.2.3)
       crass (~> 1.0.2)
       nokogiri (>= 1.5.9)
-    mail (2.7.0)
+    mail (2.7.1)
       mini_mime (>= 0.1.1)
     memoist (0.16.0)
     metaclass (0.0.4)
     mime-types (3.1)
       mime-types-data (~> 3.2015)
     mime-types-data (3.2016.0521)
-    mini_mime (1.0.0)
-    mini_portile2 (2.3.0)
+    mini_mime (1.0.1)
+    mini_portile2 (2.4.0)
     minitest (5.10.3)
     mocha (1.3.0)
       metaclass (~> 0.0.1)
@@ -182,8 +182,8 @@ GEM
     net-ssh (4.2.0)
     net-ssh-gateway (2.0.0)
       net-ssh (>= 4.0.0)
-    nokogiri (1.8.5)
-      mini_portile2 (~> 2.3.0)
+    nokogiri (1.9.1)
+      mini_portile2 (~> 2.4.0)
     npm-rails (0.2.1)
       rails (>= 3.2)
     oj (3.6.4)
@@ -206,16 +206,16 @@ GEM
       rack (>= 1.2.0)
     rack-test (0.6.3)
       rack (>= 1.0)
-    rails (4.2.10)
-      actionmailer (= 4.2.10)
-      actionpack (= 4.2.10)
-      actionview (= 4.2.10)
-      activejob (= 4.2.10)
-      activemodel (= 4.2.10)
-      activerecord (= 4.2.10)
-      activesupport (= 4.2.10)
+    rails (4.2.11)
+      actionmailer (= 4.2.11)
+      actionpack (= 4.2.11)
+      actionview (= 4.2.11)
+      activejob (= 4.2.11)
+      activemodel (= 4.2.11)
+      activerecord (= 4.2.11)
+      activesupport (= 4.2.11)
       bundler (>= 1.3.0, < 2.0)
-      railties (= 4.2.10)
+      railties (= 4.2.11)
       sprockets-rails
     rails-deprecated_sanitizer (1.0.3)
       activesupport (>= 4.2.0.alpha)
@@ -226,12 +226,12 @@ GEM
     rails-html-sanitizer (1.0.4)
       loofah (~> 2.2, >= 2.2.2)
     rails-perftest (0.0.7)
-    railties (4.2.10)
-      actionpack (= 4.2.10)
-      activesupport (= 4.2.10)
+    railties (4.2.11)
+      actionpack (= 4.2.11)
+      activesupport (= 4.2.11)
       rake (>= 0.8.7)
       thor (>= 0.18.1, < 2.0)
-    rake (12.3.1)
+    rake (12.3.2)
     raphael-rails (2.1.2)
     rb-fsevent (0.10.3)
     rb-inotify (0.9.10)
@@ -286,7 +286,7 @@ GEM
     therubyracer (0.12.3)
       libv8 (~> 3.16.14.15)
       ref
-    thor (0.20.0)
+    thor (0.20.3)
     thread_safe (0.3.6)
     tilt (2.0.8)
     tzinfo (1.2.5)
@@ -356,4 +356,4 @@ DEPENDENCIES
   wiselinks
 
 BUNDLED WITH
-   1.16.3
+   1.17.2
index 26c85d300ddcb17c8038d31c4d0f8cd1d39aabc9..0b2a22788e6f98537b0f5a3437a2d540a57d47ee 100644 (file)
@@ -119,6 +119,39 @@ class ArvPathMapper(PathMapper):
         else:
             raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
 
+    def needs_new_collection(self, srcobj, prefix=""):
+        """Check if files need to be staged into a new collection.
+
+        If all the files are in the same collection and in the same
+        paths they would be staged to, return False.  Otherwise, a new
+        collection is needed with files copied/created in the
+        appropriate places.
+        """
+
+        loc = srcobj["location"]
+        if loc.startswith("_:"):
+            return True
+        if prefix:
+            if loc != prefix+srcobj["basename"]:
+                return True
+        else:
+            i = loc.rfind("/")
+            if i > -1:
+                prefix = loc[:i+1]
+            else:
+                prefix = loc+"/"
+        if srcobj["class"] == "File" and loc not in self._pathmap:
+            return True
+        for s in srcobj.get("secondaryFiles", []):
+            if self.needs_new_collection(s, prefix):
+                return True
+        if srcobj.get("listing"):
+            prefix = "%s%s/" % (prefix, srcobj["basename"])
+            for l in srcobj["listing"]:
+                if self.needs_new_collection(l, prefix):
+                    return True
+        return False
+
     def setup(self, referenced_files, basedir):
         # type: (List[Any], unicode) -> None
         uploadfiles = set()
@@ -169,6 +202,13 @@ class ArvPathMapper(PathMapper):
             elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
                 (srcobj["location"].startswith("_:") and "contents" in srcobj)):
 
+                # If all secondary files/directories are located in
+                # the same collection as the primary file and the
+                # paths and names that are consistent with staging,
+                # don't create a new collection.
+                if not self.needs_new_collection(srcobj):
+                    continue
+
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                   keep_client=self.arvrunner.keep_client,
                                                   num_retries=self.arvrunner.num_retries                                                  )
index f924adbbd615e11f84f4f773058d5fb2c27e8ff8..a6f4022cc3559adb40f7a5891495de2fbfd78c2a 100755 (executable)
@@ -83,7 +83,7 @@ export ARVADOS_API_TOKEN=\$(cat /var/lib/arvados/superuser_token)
 if test "$tag" = "latest" ; then
   arv-keepdocker --pull arvados/jobs $tag
 else
-  jobsimg=\$(curl http://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])")
+  jobsimg=\$(curl https://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])")
   arv-keepdocker --pull arvados/jobs \$jobsimg
   docker tag arvados/jobs:\$jobsimg arvados/jobs:latest
   arv-keepdocker arvados/jobs latest
index fb3c257d93e1be9cac211defc97d3282100ccdbc..b78e89012ad62c5f952476da0553b2d26dac5fd3 100644 (file)
@@ -102,3 +102,132 @@ class TestPathmap(unittest.TestCase):
                 "class": "File",
                 "location": "file:tests/hw.py"
             }], "", "/test/%s", "/test/%s/%s")
+
+    def test_needs_new_collection(self):
+        arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
+
+        # Plain file.  Don't need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py"
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        # A file that isn't in the pathmap (for some reason).  Need a new collection.
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        self.assertTrue(p.needs_new_collection(a))
+
+        # A file with a secondary file in the same collection.  Don't need
+        # a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+                "basename": "hw.pyc"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        # Secondary file is in a different collection from the
+        # a new collectionprimary.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999992+99/hw.pyc",
+                "basename": "hw.pyc"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        # Secondary file should be staged to a different name than
+        # path in location.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+                "basename": "hw.other"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        # Secondary file is a directory.  Do not need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "Directory",
+                "location": "keep:99999999999999999999999999999991+99/hw",
+                "basename": "hw",
+                "listing": [{
+                    "class": "File",
+                    "location": "keep:99999999999999999999999999999991+99/hw/h2",
+                    "basename": "h2"
+                }]
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        # Secondary file is a renamed directory.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "Directory",
+                "location": "keep:99999999999999999999999999999991+99/hw",
+                "basename": "wh",
+                "listing": [{
+                    "class": "File",
+                    "location": "keep:99999999999999999999999999999991+99/hw/h2",
+                    "basename": "h2"
+                }]
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        # Secondary file is a file literal.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "_:123",
+                "basename": "hw.pyc",
+                "contents": "123"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["_:123"] = True
+        self.assertTrue(p.needs_new_collection(a))
index e6e67d63135e5965157b633400f41980af32ae58..aecf748d10cbe93dc3a41b6030269fc3c02478ad 100644 (file)
@@ -8,37 +8,37 @@ GIT
 GEM
   remote: https://rubygems.org/
   specs:
-    actionmailer (4.2.10)
-      actionpack (= 4.2.10)
-      actionview (= 4.2.10)
-      activejob (= 4.2.10)
+    actionmailer (4.2.11)
+      actionpack (= 4.2.11)
+      actionview (= 4.2.11)
+      activejob (= 4.2.11)
       mail (~> 2.5, >= 2.5.4)
       rails-dom-testing (~> 1.0, >= 1.0.5)
-    actionpack (4.2.10)
-      actionview (= 4.2.10)
-      activesupport (= 4.2.10)
+    actionpack (4.2.11)
+      actionview (= 4.2.11)
+      activesupport (= 4.2.11)
       rack (~> 1.6)
       rack-test (~> 0.6.2)
       rails-dom-testing (~> 1.0, >= 1.0.5)
       rails-html-sanitizer (~> 1.0, >= 1.0.2)
-    actionview (4.2.10)
-      activesupport (= 4.2.10)
+    actionview (4.2.11)
+      activesupport (= 4.2.11)
       builder (~> 3.1)
       erubis (~> 2.7.0)
       rails-dom-testing (~> 1.0, >= 1.0.5)
       rails-html-sanitizer (~> 1.0, >= 1.0.3)
-    activejob (4.2.10)
-      activesupport (= 4.2.10)
+    activejob (4.2.11)
+      activesupport (= 4.2.11)
       globalid (>= 0.3.0)
-    activemodel (4.2.10)
-      activesupport (= 4.2.10)
+    activemodel (4.2.11)
+      activesupport (= 4.2.11)
       builder (~> 3.1)
-    activerecord (4.2.10)
-      activemodel (= 4.2.10)
-      activesupport (= 4.2.10)
+    activerecord (4.2.11)
+      activemodel (= 4.2.11)
+      activesupport (= 4.2.11)
       arel (~> 6.0)
     activerecord-deprecated_finders (1.0.4)
-    activesupport (4.2.10)
+    activesupport (4.2.11)
       i18n (~> 0.7)
       minitest (~> 5.1)
       thread_safe (~> 0.3, >= 0.3.4)
@@ -85,7 +85,7 @@ GEM
       coffee-script-source
       execjs
     coffee-script-source (1.12.2)
-    concurrent-ruby (1.0.5)
+    concurrent-ruby (1.1.4)
     crass (1.0.4)
     curb (0.9.6)
     database_cleaner (1.7.0)
@@ -152,12 +152,12 @@ GEM
     loofah (2.2.3)
       crass (~> 1.0.2)
       nokogiri (>= 1.5.9)
-    mail (2.7.0)
+    mail (2.7.1)
       mini_mime (>= 0.1.1)
     memoist (0.16.0)
     metaclass (0.0.4)
-    mini_mime (1.0.0)
-    mini_portile2 (2.3.0)
+    mini_mime (1.0.1)
+    mini_portile2 (2.4.0)
     minitest (5.11.3)
     mocha (1.5.0)
       metaclass (~> 0.0.1)
@@ -171,8 +171,8 @@ GEM
     net-ssh (4.2.0)
     net-ssh-gateway (2.0.0)
       net-ssh (>= 4.0.0)
-    nokogiri (1.8.5)
-      mini_portile2 (~> 2.3.0)
+    nokogiri (1.9.1)
+      mini_portile2 (~> 2.4.0)
     oauth2 (1.4.0)
       faraday (>= 0.8, < 0.13)
       jwt (~> 1.0)
@@ -198,16 +198,16 @@ GEM
     rack (1.6.11)
     rack-test (0.6.3)
       rack (>= 1.0)
-    rails (4.2.10)
-      actionmailer (= 4.2.10)
-      actionpack (= 4.2.10)
-      actionview (= 4.2.10)
-      activejob (= 4.2.10)
-      activemodel (= 4.2.10)
-      activerecord (= 4.2.10)
-      activesupport (= 4.2.10)
+    rails (4.2.11)
+      actionmailer (= 4.2.11)
+      actionpack (= 4.2.11)
+      actionview (= 4.2.11)
+      activejob (= 4.2.11)
+      activemodel (= 4.2.11)
+      activerecord (= 4.2.11)
+      activesupport (= 4.2.11)
       bundler (>= 1.3.0, < 2.0)
-      railties (= 4.2.10)
+      railties (= 4.2.11)
       sprockets-rails
     rails-deprecated_sanitizer (1.0.3)
       activesupport (>= 4.2.0.alpha)
@@ -219,12 +219,12 @@ GEM
       loofah (~> 2.2, >= 2.2.2)
     rails-observers (0.1.5)
       activemodel (>= 4.0)
-    railties (4.2.10)
-      actionpack (= 4.2.10)
-      activesupport (= 4.2.10)
+    railties (4.2.11)
+      actionpack (= 4.2.11)
+      activesupport (= 4.2.11)
       rake (>= 0.8.7)
       thor (>= 0.18.1, < 2.0)
-    rake (12.3.1)
+    rake (12.3.2)
     ref (2.0.0)
     request_store (1.4.1)
       rack (>= 1.4)
@@ -270,7 +270,7 @@ GEM
     therubyracer (0.12.3)
       libv8 (~> 3.16.14.15)
       ref
-    thor (0.20.0)
+    thor (0.20.3)
     thread_safe (0.3.6)
     tilt (1.4.1)
     trollop (2.1.2)
@@ -326,4 +326,4 @@ DEPENDENCIES
   uglifier (~> 2.0)
 
 BUNDLED WITH
-   1.16.3
+   1.17.2