closes #9824
[arvados.git] / sdk / cwl / arvados_cwl / fsaccess.py
1 import fnmatch
2 import os
3 import errno
4
5 import cwltool.stdfsaccess
6 from cwltool.pathmapper import abspath
7
8 import arvados.util
9 import arvados.collection
10 import arvados.arvfile
11
12 class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
13     """Implement the cwltool FsAccess interface for Arvados Collections."""
14
15     def __init__(self, basedir, api_client=None):
16         super(CollectionFsAccess, self).__init__(basedir)
17         self.api_client = api_client
18         self.collections = {}
19
20     def get_collection(self, path):
21         p = path.split("/")
22         if p[0].startswith("keep:") and arvados.util.keep_locator_pattern.match(p[0][5:]):
23             pdh = p[0][5:]
24             if pdh not in self.collections:
25                 self.collections[pdh] = arvados.collection.CollectionReader(pdh, api_client=self.api_client)
26             return (self.collections[pdh], "/".join(p[1:]))
27         else:
28             return (None, path)
29
30     def _match(self, collection, patternsegments, parent):
31         if not patternsegments:
32             return []
33
34         if not isinstance(collection, arvados.collection.RichCollectionBase):
35             return []
36
37         ret = []
38         # iterate over the files and subcollections in 'collection'
39         for filename in collection:
40             if patternsegments[0] == '.':
41                 # Pattern contains something like "./foo" so just shift
42                 # past the "./"
43                 ret.extend(self._match(collection, patternsegments[1:], parent))
44             elif fnmatch.fnmatch(filename, patternsegments[0]):
45                 cur = os.path.join(parent, filename)
46                 if len(patternsegments) == 1:
47                     ret.append(cur)
48                 else:
49                     ret.extend(self._match(collection[filename], patternsegments[1:], cur))
50         return ret
51
52     def glob(self, pattern):
53         collection, rest = self.get_collection(pattern)
54         if collection and not rest:
55             return [pattern]
56         patternsegments = rest.split("/")
57         return self._match(collection, patternsegments, "keep:" + collection.manifest_locator())
58
59     def open(self, fn, mode):
60         collection, rest = self.get_collection(fn)
61         if collection:
62             return collection.open(rest, mode)
63         else:
64             return super(CollectionFsAccess, self).open(self._abs(fn), mode)
65
66     def exists(self, fn):
67         collection, rest = self.get_collection(fn)
68         if collection:
69             return collection.exists(rest)
70         else:
71             return super(CollectionFsAccess, self).exists(fn)
72
73     def isfile(self, fn):  # type: (unicode) -> bool
74         collection, rest = self.get_collection(fn)
75         if collection:
76             if rest:
77                 return isinstance(collection.find(rest), arvados.arvfile.ArvadosFile)
78             else:
79                 return False
80         else:
81             return super(CollectionFsAccess, self).isfile(fn)
82
83     def isdir(self, fn):  # type: (unicode) -> bool
84         collection, rest = self.get_collection(fn)
85         if collection:
86             if rest:
87                 return isinstance(collection.find(rest), arvados.collection.Collection)
88             else:
89                 return True
90         else:
91             return super(CollectionFsAccess, self).isdir(fn)
92
93     def listdir(self, fn):  # type: (unicode) -> List[unicode]
94         collection, rest = self.get_collection(fn)
95         if collection:
96             if rest:
97                 dir = collection.find(rest)
98             else:
99                 dir = collection
100             if dir is None:
101                 raise IOError(errno.ENOENT, "Directory '%s' in '%s' not found" % (rest, collection.portable_data_hash()))
102             if not isinstance(dir, arvados.collection.Collection):
103                 raise IOError(errno.ENOENT, "Path '%s' in '%s' is not a Directory" % (rest, collection.portable_data_hash()))
104             return [abspath(l, fn) for l in dir.keys()]
105         else:
106             return super(CollectionFsAccess, self).listdir(fn)
107
108     def join(self, path, *paths): # type: (unicode, *unicode) -> unicode
109         if paths and paths[-1].startswith("keep:") and arvados.util.keep_locator_pattern.match(paths[-1][5:]):
110             return paths[-1]
111         return os.path.join(path, *paths)
112
113     def realpath(self, path):
114         if path.startswith("$(task.tmpdir)") or path.startswith("$(task.outdir)"):
115             return path
116         collection, rest = self.get_collection(path)
117         if collection:
118             return path
119         else:
120             return os.path.realpath(path)