19699: Add option to defer downloads
authorPeter Amstutz <peter.amstutz@curii.com>
Fri, 4 Nov 2022 01:01:03 +0000 (21:01 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Mon, 14 Nov 2022 16:27:45 +0000 (11:27 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/context.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/pathmapper.py

index 9cad9bd00d11eb96f50d2a65be5961de06e4d8c6..e3b8e500a1700db18d01ae8cd0f57a05e31890f8 100644 (file)
@@ -218,6 +218,9 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     parser.add_argument("--http-timeout", type=int,
                         default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
 
+    parser.add_argument("--defer-downloads", action="store_true", default=False,
+                        help="When submitting a workflow, defer downloading HTTP URLs to workflow launch instead of downloading to Keep before submit.")
+
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--enable-preemptible", dest="enable_preemptible", default=None, action="store_true", help="Use preemptible instances. Control individual steps with arv:UsePreemptible hint.")
     exgroup.add_argument("--disable-preemptible", dest="enable_preemptible", default=None, action="store_false", help="Don't use preemptible instances.")
index 64f85e20763590fd173e57046f471f6e41602ac2..ec473a04feea1071c27cd43eeae35c112edf1733 100644 (file)
@@ -39,6 +39,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.match_local_docker = False
         self.enable_preemptible = None
         self.copy_deps = None
+        self.defer_downloads = False
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 
index a30c0fff5d316c25e867a077819e04f83f5554cc..84a9799f6101cdb4b4566a759a9e0ccebe2442b8 100644 (file)
@@ -206,6 +206,8 @@ The 'jobs' API is no longer supported.
         self.toplevel_runtimeContext.make_fs_access = partial(CollectionFsAccess,
                                                      collection_cache=self.collection_cache)
 
+        self.defer_downloads = arvargs.submit and arvargs.defer_downloads
+
         validate_cluster_target(self, self.toplevel_runtimeContext)
 
 
index 64fdfa0d04032e97235dc581144d9cb74494c597..89364a905fb75d9ce5d258e98243c3a9b5ea05b1 100644 (file)
@@ -105,9 +105,13 @@ class ArvPathMapper(PathMapper):
                     raise WorkflowException("Directory literal '%s' is missing `listing`" % src)
             elif src.startswith("http:") or src.startswith("https:"):
                 try:
-                    keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src)
-                    logger.info("%s is %s", src, keepref)
-                    self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
+                    if self.arvrunner.defer_downloads:
+                        # passthrough, we'll download it later.
+                        self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True)
+                    else:
+                        keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src)
+                        logger.info("%s is %s", src, keepref)
+                        self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True)
                 except Exception as e:
                     logger.warning(str(e))
             else: