Merge branch '9964-output-glob-acr' refs #9964
[arvados.git] / sdk / python / arvados / commands / ls.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: Apache-2.0
4
5 import argparse
6 import collections
7 import logging
8 import re
9 import sys
10
11 import arvados
12 import arvados.commands._util as arv_cmd
13
14 from arvados._version import __version__
15
16 FileInfo = collections.namedtuple('FileInfo', ['stream_name', 'name', 'size'])
17
18 def parse_args(args):
19     parser = argparse.ArgumentParser(
20         description='List contents of a manifest',
21         parents=[arv_cmd.retry_opt])
22
23     parser.add_argument('locator', type=str,
24                         help="""Collection UUID or locator, optionally with a subdir path.""")
25     parser.add_argument('-s', action='store_true',
26                         help="""List file sizes, in KiB.""")
27     parser.add_argument('--version', action='version',
28                         version="%s %s" % (sys.argv[0], __version__),
29                         help='Print version and exit.')
30
31     return parser.parse_args(args)
32
33 def size_formatter(coll_file):
34     return "{:>10}".format((coll_file.size + 1023) // 1024)
35
36 def name_formatter(coll_file):
37     return "{}/{}".format(coll_file.stream_name, coll_file.name)
38
39 def main(args, stdout, stderr, api_client=None, logger=None):
40     args = parse_args(args)
41
42     if api_client is None:
43         api_client = arvados.api('v1', num_retries=args.retries)
44
45     if logger is None:
46         logger = logging.getLogger('arvados.arv-ls')
47
48     try:
49         r = re.search(r'^(.*?)(/.*)?$', args.locator)
50         collection = r.group(1)
51         get_prefix = r.group(2)
52
53         cr = arvados.CollectionReader(collection, api_client=api_client,
54                                       num_retries=args.retries)
55         if get_prefix:
56             if get_prefix[-1] == '/':
57                 get_prefix = get_prefix[:-1]
58             stream_name = '.' + get_prefix
59             reader = cr.find(stream_name)
60             if not (isinstance(reader, arvados.CollectionReader) or
61                     isinstance(reader, arvados.collection.Subcollection)):
62                 logger.error("'{}' is not a subdirectory".format(get_prefix))
63                 return 1
64         else:
65             stream_name = '.'
66             reader = cr
67     except (arvados.errors.ApiError,
68             arvados.errors.ArgumentError,
69             arvados.errors.NotFoundError) as error:
70         logger.error("error fetching collection: {}".format(error))
71         return 1
72
73     formatters = []
74     if args.s:
75         formatters.append(size_formatter)
76     formatters.append(name_formatter)
77
78     for f in files_in_collection(reader, stream_name):
79         print(*(info_func(f) for info_func in formatters), file=stdout)
80
81     return 0
82
83 def files_in_collection(c, stream_name='.'):
84     # Sort first by file type, then alphabetically by file path.
85     for i in sorted(c.keys(),
86                     key=lambda k: (
87                         isinstance(c[k], arvados.collection.Subcollection),
88                         k.upper())):
89         if isinstance(c[i], arvados.arvfile.ArvadosFile):
90             yield FileInfo(stream_name=stream_name,
91                            name=i,
92                            size=c[i].size())
93         elif isinstance(c[i], arvados.collection.Subcollection):
94             for f in files_in_collection(c[i], "{}/{}".format(stream_name, i)):
95                 yield f