From 34f3229a26dcc9d9b6c94e207eb8f58bb6555acb Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 3 Nov 2022 21:01:03 -0400 Subject: [PATCH] 19699: Add option to defer downloads Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- sdk/cwl/arvados_cwl/__init__.py | 3 +++ sdk/cwl/arvados_cwl/context.py | 1 + sdk/cwl/arvados_cwl/executor.py | 2 ++ sdk/cwl/arvados_cwl/pathmapper.py | 10 +++++++--- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index 9cad9bd00d..e3b8e500a1 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -218,6 +218,9 @@ def arg_parser(): # type: () -> argparse.ArgumentParser parser.add_argument("--http-timeout", type=int, default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).") + parser.add_argument("--defer-downloads", action="store_true", default=False, + help="When submitting a workflow, defer downloading HTTP URLs to workflow launch instead of downloading to Keep before submit.") + exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--enable-preemptible", dest="enable_preemptible", default=None, action="store_true", help="Use preemptible instances. Control individual steps with arv:UsePreemptible hint.") exgroup.add_argument("--disable-preemptible", dest="enable_preemptible", default=None, action="store_false", help="Don't use preemptible instances.") diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py index 64f85e2076..ec473a04fe 100644 --- a/sdk/cwl/arvados_cwl/context.py +++ b/sdk/cwl/arvados_cwl/context.py @@ -39,6 +39,7 @@ class ArvRuntimeContext(RuntimeContext): self.match_local_docker = False self.enable_preemptible = None self.copy_deps = None + self.defer_downloads = False super(ArvRuntimeContext, self).__init__(kwargs) diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py index a30c0fff5d..84a9799f61 100644 --- a/sdk/cwl/arvados_cwl/executor.py +++ b/sdk/cwl/arvados_cwl/executor.py @@ -206,6 +206,8 @@ The 'jobs' API is no longer supported. self.toplevel_runtimeContext.make_fs_access = partial(CollectionFsAccess, collection_cache=self.collection_cache) + self.defer_downloads = arvargs.submit and arvargs.defer_downloads + validate_cluster_target(self, self.toplevel_runtimeContext) diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py index 64fdfa0d04..89364a905f 100644 --- a/sdk/cwl/arvados_cwl/pathmapper.py +++ b/sdk/cwl/arvados_cwl/pathmapper.py @@ -105,9 +105,13 @@ class ArvPathMapper(PathMapper): raise WorkflowException("Directory literal '%s' is missing `listing`" % src) elif src.startswith("http:") or src.startswith("https:"): try: - keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src) - logger.info("%s is %s", src, keepref) - self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True) + if self.arvrunner.defer_downloads: + # passthrough, we'll download it later. + self._pathmap[src] = MapperEnt(src, src, srcobj["class"], True) + else: + keepref = http_to_keep(self.arvrunner.api, self.arvrunner.project_uuid, src) + logger.info("%s is %s", src, keepref) + self._pathmap[src] = MapperEnt(keepref, keepref, srcobj["class"], True) except Exception as e: logger.warning(str(e)) else: -- 2.30.2