Merge branch '18947-githttpd'
[arvados.git] / sdk / cwl / tests / test_pathmapper.py
index e5326147d7737d9f0f0d2e52fda8120a54770ae0..194092db7a0461e4174d12df3cd88c5a32a3cafc 100644 (file)
@@ -9,12 +9,12 @@ import unittest
 import json
 import logging
 import os
-import datetime
 
 import arvados
 import arvados.keep
 import arvados.collection
 import arvados_cwl
+import arvados_cwl.executor
 
 from cwltool.pathmapper import MapperEnt
 from .mock_discovery import get_rootDesc
@@ -27,22 +27,22 @@ def upload_mock(files, api, dry_run=False, num_retries=0, project=None, fnPatter
         c.keepref = "%s/%s" % (pdh, os.path.basename(c.fn))
         c.fn = fnPattern % (pdh, os.path.basename(c.fn))
 
-class MockDateTime(datetime.datetime):
-    @classmethod
-    def now(cls):
-        return datetime.datetime(2018, 1, 1, 0, 0, 0, 0)
-
-datetime.datetime = MockDateTime
-
 class TestPathmap(unittest.TestCase):
     def setUp(self):
         self.api = mock.MagicMock()
         self.api._rootDesc = get_rootDesc()
 
+    def tearDown(self):
+        root_logger = logging.getLogger('')
+
+        # Remove existing RuntimeStatusLoggingHandlers if they exist
+        handlers = [h for h in root_logger.handlers if not isinstance(h, arvados_cwl.executor.RuntimeStatusLoggingHandler)]
+        root_logger.handlers = handlers
+
     def test_keepref(self):
         """Test direct keep references."""
 
-        arvrunner = arvados_cwl.ArvCwlRunner(self.api)
+        arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
 
         p = ArvPathMapper(arvrunner, [{
             "class": "File",
@@ -57,7 +57,7 @@ class TestPathmap(unittest.TestCase):
     def test_upload(self, statfile, upl):
         """Test pathmapper uploading files."""
 
-        arvrunner = arvados_cwl.ArvCwlRunner(self.api)
+        arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
 
         def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False):
             st = arvados.commands.run.UploadFile("", "tests/hw.py")
@@ -78,7 +78,7 @@ class TestPathmap(unittest.TestCase):
     @mock.patch("arvados.commands.run.statfile")
     def test_statfile(self, statfile, upl):
         """Test pathmapper handling ArvFile references."""
-        arvrunner = arvados_cwl.ArvCwlRunner(self.api)
+        arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
 
         # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a
         # keep mount, so we can construct a direct reference directly without upload.
@@ -100,7 +100,7 @@ class TestPathmap(unittest.TestCase):
     @mock.patch("os.stat")
     def test_missing_file(self, stat):
         """Test pathmapper handling missing references."""
-        arvrunner = arvados_cwl.ArvCwlRunner(self.api)
+        arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
 
         stat.side_effect = OSError(2, "No such file or directory")
 
@@ -110,18 +110,144 @@ class TestPathmap(unittest.TestCase):
                 "location": "file:tests/hw.py"
             }], "", "/test/%s", "/test/%s/%s")
 
-    def test_get_intermediate_collection_info(self):
-        self.api.containers().current().execute.return_value = {"uuid" : "zzzzz-8i9sb-zzzzzzzzzzzzzzz"}
-        arvrunner = arvados_cwl.ArvCwlRunner(self.api)
-        arvrunner.intermediate_output_ttl = 60
+    def test_needs_new_collection(self):
+        arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
 
-        path_mapper = ArvPathMapper(arvrunner, [{
+        # Plain file.  Don't need a new collection.
+        a = {
             "class": "File",
-            "location": "keep:99999999999999999999999999999991+99/hw.py"
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py"
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        # A file that isn't in the pathmap (for some reason).  Need a new collection.
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        self.assertTrue(p.needs_new_collection(a))
+
+        # A file with a secondary file in the same collection.  Don't need
+        # a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+                "basename": "hw.pyc"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        # Secondary file is in a different collection from the
+        # a new collectionprimary.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999992+99/hw.pyc",
+                "basename": "hw.pyc"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        # Secondary file should be staged to a different name than
+        # path in location.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+                "basename": "hw.other"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        # Secondary file is a directory.  Do not need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "Directory",
+                "location": "keep:99999999999999999999999999999991+99/hw",
+                "basename": "hw",
+                "listing": [{
+                    "class": "File",
+                    "location": "keep:99999999999999999999999999999991+99/hw/h2",
+                    "basename": "h2"
+                }]
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        # Secondary file is a renamed directory.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "Directory",
+                "location": "keep:99999999999999999999999999999991+99/hw",
+                "basename": "wh",
+                "listing": [{
+                    "class": "File",
+                    "location": "keep:99999999999999999999999999999991+99/hw/h2",
+                    "basename": "h2"
+                }]
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        # Secondary file is a file literal.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "_:123",
+                "basename": "hw.pyc",
+                "contents": "123"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["_:123"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+    def test_is_in_collection(self):
+        arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
+        self.maxDiff = 1000000
+
+        cwd = os.getcwd()
+        p = ArvPathMapper(arvrunner, [{
+            "class": "File",
+            "location": "file://"+cwd+"/tests/fake-keep-mount/fake_collection_dir/subdir/banana.txt"
         }], "", "/test/%s", "/test/%s/%s")
 
-        info = path_mapper._get_intermediate_collection_info()
-
-        self.assertEqual(info["name"], "Intermediate collection")
-        self.assertEqual(info["trash_at"], datetime.datetime(2018, 1, 1, 0, 1))
-        self.assertEqual(info["properties"], {"type" : "Intermediate", "container" : "zzzzz-8i9sb-zzzzzzzzzzzzzzz"})
+        self.assertEqual({"file://"+cwd+"/tests/fake-keep-mount/fake_collection_dir/subdir/banana.txt": MapperEnt(resolved='keep:99999999999999999999999999999991+99/subdir/banana.txt', target='/test/99999999999999999999999999999991+99/subdir/banana.txt', type='File', staged=True)},
+                         p._pathmap)