Merge branch 'master' into 11898-no-distinct
[arvados.git] / sdk / cwl / arvados_cwl / fsaccess.py
index 34d9cea3a4ad34dedcdd68dfc4e69811bb4d6886..08e203b87908aa13d702ee983b1c39617a9ca8a2 100644 (file)
@@ -1,9 +1,14 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import fnmatch
 import os
 import errno
 import urlparse
 import re
 import logging
+import threading
 
 import ruamel.yaml as yaml
 
@@ -25,13 +30,15 @@ class CollectionCache(object):
         self.api_client = api_client
         self.keep_client = keep_client
         self.collections = {}
+        self.lock = threading.Lock()
 
     def get(self, pdh):
-        if pdh not in self.collections:
-            logger.debug("Creating collection reader for %s", pdh)
-            self.collections[pdh] = arvados.collection.CollectionReader(pdh, api_client=self.api_client,
-                                                                        keep_client=self.keep_client)
-        return self.collections[pdh]
+        with self.lock:
+            if pdh not in self.collections:
+                logger.debug("Creating collection reader for %s", pdh)
+                self.collections[pdh] = arvados.collection.CollectionReader(pdh, api_client=self.api_client,
+                                                                            keep_client=self.keep_client)
+            return self.collections[pdh]
 
 
 class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
@@ -88,7 +95,7 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
 
     def exists(self, fn):
         collection, rest = self.get_collection(fn)
-        if collection:
+        if collection is not None:
             if rest:
                 return collection.exists(rest)
             else:
@@ -146,13 +153,16 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
             return os.path.realpath(path)
 
 class CollectionFetcher(DefaultFetcher):
-    def __init__(self, cache, session, api_client=None, fs_access=None, num_retries=4):
+    def __init__(self, cache, session, api_client=None, fs_access=None, num_retries=4, overrides=None):
         super(CollectionFetcher, self).__init__(cache, session)
         self.api_client = api_client
         self.fsaccess = fs_access
         self.num_retries = num_retries
+        self.overrides = overrides if overrides else {}
 
     def fetch_text(self, url):
+        if url in self.overrides:
+            return self.overrides[url]
         if url.startswith("keep:"):
             with self.fsaccess.open(url, "r") as f:
                 return f.read()
@@ -163,6 +173,8 @@ class CollectionFetcher(DefaultFetcher):
         return super(CollectionFetcher, self).fetch_text(url)
 
     def check_exists(self, url):
+        if url in self.overrides:
+            return True
         try:
             if url.startswith("http://arvados.org/cwl"):
                 return True
@@ -215,6 +227,9 @@ workflow_uuid_pattern = re.compile(r'[a-z0-9]{5}-7fd4e-[a-z0-9]{15}')
 pipeline_template_uuid_pattern = re.compile(r'[a-z0-9]{5}-p5p6p-[a-z0-9]{15}')
 
 def collectionResolver(api_client, document_loader, uri, num_retries=4):
+    if uri.startswith("keep:") or uri.startswith("arvwf:"):
+        return uri
+
     if workflow_uuid_pattern.match(uri):
         return "arvwf:%s#main" % (uri)