X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/a310d114bdc06b20cd007e6aff14b409e1c11e32..b9fd7e3f374248a61159e4750a84e38d1c48d5dd:/sdk/cwl/arvados_cwl/fsaccess.py

diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py
index 3744b4a93a..bc2c5e34d7 100644
--- a/sdk/cwl/arvados_cwl/fsaccess.py
+++ b/sdk/cwl/arvados_cwl/fsaccess.py
@@ -63,24 +63,27 @@ class CollectionCache(object):
             del self.collections[pdh]
             self.total -= v[1]
 
-    def get(self, pdh):
+    def get(self, locator):
         with self.lock:
-            if pdh not in self.collections:
-                m = pdh_size.match(pdh)
+            if locator not in self.collections:
+                m = pdh_size.match(locator)
                 if m:
                     self.cap_cache(int(m.group(2)) * 128)
-                logger.debug("Creating collection reader for %s", pdh)
-                cr = arvados.collection.CollectionReader(pdh, api_client=self.api_client,
-                                                         keep_client=self.keep_client,
-                                                         num_retries=self.num_retries)
+                logger.debug("Creating collection reader for %s", locator)
+                try:
+                    cr = arvados.collection.CollectionReader(locator, api_client=self.api_client,
+                                                             keep_client=self.keep_client,
+                                                             num_retries=self.num_retries)
+                except arvados.errors.ApiError as ap:
+                    raise IOError(errno.ENOENT, "Could not access collection '%s': %s" % (locator, str(ap._get_reason())))
                 sz = len(cr.manifest_text()) * 128
-                self.collections[pdh] = (cr, sz)
+                self.collections[locator] = (cr, sz)
                 self.total += sz
             else:
-                cr, sz = self.collections[pdh]
+                cr, sz = self.collections[locator]
                 # bump it to the back
-                del self.collections[pdh]
-                self.collections[pdh] = (cr, sz)
+                del self.collections[locator]
+                self.collections[locator] = (cr, sz)
             return cr
 
 
@@ -94,9 +97,10 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
     def get_collection(self, path):
         sp = path.split("/", 1)
         p = sp[0]
-        if p.startswith("keep:") and arvados.util.keep_locator_pattern.match(p[5:]):
-            pdh = p[5:]
-            return (self.collection_cache.get(pdh), urllib.parse.unquote(sp[1]) if len(sp) == 2 else None)
+        if p.startswith("keep:") and (arvados.util.keep_locator_pattern.match(p[5:]) or
+                                      arvados.util.collection_uuid_pattern.match(p[5:])):
+            locator = p[5:]
+            return (self.collection_cache.get(locator), urllib.parse.unquote(sp[1]) if len(sp) == 2 else None)
         else:
             return (None, path)
 
@@ -129,10 +133,10 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
         patternsegments = rest.split("/")
         return sorted(self._match(collection, patternsegments, "keep:" + collection.manifest_locator()))
 
-    def open(self, fn, mode):
+    def open(self, fn, mode, encoding=None):
         collection, rest = self.get_collection(fn)
         if collection is not None:
-            return collection.open(rest, mode)
+            return collection.open(rest, mode, encoding=encoding)
         else:
             return super(CollectionFsAccess, self).open(self._abs(fn), mode)
 
@@ -221,12 +225,13 @@ class CollectionFetcher(DefaultFetcher):
 
     def fetch_text(self, url):
         if url.startswith("keep:"):
-            with self.fsaccess.open(url, "r") as f:
+            with self.fsaccess.open(url, "r", encoding="utf-8") as f:
                 return f.read()
         if url.startswith("arvwf:"):
             record = self.api_client.workflows().get(uuid=url[6:]).execute(num_retries=self.num_retries)
-            definition = record["definition"] + ('\nlabel: "%s"\n' % record["name"].replace('"', '\\"'))
-            return definition
+            definition = yaml.round_trip_load(record["definition"])
+            definition["label"] = record["name"]
+            return yaml.round_trip_dump(definition)
         return super(CollectionFetcher, self).fetch_text(url)
 
     def check_exists(self, url):
@@ -261,9 +266,11 @@ class CollectionFetcher(DefaultFetcher):
             baseparts = basesp.path.split("/")
             urlparts = urlsp.path.split("/") if urlsp.path else []
 
-            pdh = baseparts.pop(0)
+            locator = baseparts.pop(0)
 
-            if basesp.scheme == "keep" and not arvados.util.keep_locator_pattern.match(pdh):
+            if (basesp.scheme == "keep" and
+                (not arvados.util.keep_locator_pattern.match(locator)) and
+                (not arvados.util.collection_uuid_pattern.match(locator))):
                 raise IOError(errno.EINVAL, "Invalid Keep locator", base_url)
 
             if urlsp.path.startswith("/"):
@@ -273,7 +280,7 @@ class CollectionFetcher(DefaultFetcher):
             if baseparts and urlsp.path:
                 baseparts.pop()
 
-            path = "/".join([pdh] + baseparts + urlparts)
+            path = "/".join([locator] + baseparts + urlparts)
             return urllib.parse.urlunsplit((basesp.scheme, "", path, "", urlsp.fragment))
 
         return super(CollectionFetcher, self).urljoin(base_url, url)