Merge branch '7824-arvls-arvput-collection-api-usage'
authorLucas Di Pentima <lucas@curoverse.com>
Thu, 6 Apr 2017 19:17:43 +0000 (16:17 -0300)
committerLucas Di Pentima <lucas@curoverse.com>
Thu, 6 Apr 2017 19:17:43 +0000 (16:17 -0300)
Closes #7824

sdk/cli/test/test_arv-keep-get.rb
sdk/python/arvados/commands/get.py [new file with mode: 0755]
sdk/python/arvados/commands/ls.py
sdk/python/bin/arv-get
sdk/python/tests/test_arv_get.py [new file with mode: 0644]
sdk/python/tests/test_arv_ls.py

index 04f454369cc6541be477fe3f585c637f45c7aee9..b1f6bdf857a0fcda9b4f44d4db4778863093067d 100644 (file)
@@ -140,7 +140,7 @@ class TestArvKeepGet < Minitest::Test
       assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
     end
     assert_equal '', out
-    assert_match /Error:/, err
+    assert_match /ERROR:/, err
   end
 
   def test_nonexistent_manifest
@@ -148,7 +148,7 @@ class TestArvKeepGet < Minitest::Test
       assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
     end
     assert_equal '', out
-    assert_match /Error:/, err
+    assert_match /ERROR:/, err
   end
 
   def test_manifest_root_to_dir
diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py
new file mode 100755 (executable)
index 0000000..f39e092
--- /dev/null
@@ -0,0 +1,276 @@
+#!/usr/bin/env python
+
+import argparse
+import hashlib
+import os
+import re
+import string
+import sys
+import logging
+
+import arvados
+import arvados.commands._util as arv_cmd
+
+from arvados._version import __version__
+
+api_client = None
+logger = logging.getLogger('arvados.arv-get')
+
+parser = argparse.ArgumentParser(
+    description='Copy data from Keep to a local file or pipe.',
+    parents=[arv_cmd.retry_opt])
+parser.add_argument('--version', action='version',
+                    version="%s %s" % (sys.argv[0], __version__),
+                    help='Print version and exit.')
+parser.add_argument('locator', type=str,
+                    help="""
+Collection locator, optionally with a file path or prefix.
+""")
+parser.add_argument('destination', type=str, nargs='?', default='-',
+                    help="""
+Local file or directory where the data is to be written. Default: stdout.
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('--progress', action='store_true',
+                   help="""
+Display human-readable progress on stderr (bytes and, if possible,
+percentage of total data size). This is the default behavior when it
+is not expected to interfere with the output: specifically, stderr is
+a tty _and_ either stdout is not a tty, or output is being written to
+named files rather than stdout.
+""")
+group.add_argument('--no-progress', action='store_true',
+                   help="""
+Do not display human-readable progress on stderr.
+""")
+group.add_argument('--batch-progress', action='store_true',
+                   help="""
+Display machine-readable progress on stderr (bytes and, if known,
+total data size).
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('--hash',
+                    help="""
+Display the hash of each file as it is read from Keep, using the given
+hash algorithm. Supported algorithms include md5, sha1, sha224,
+sha256, sha384, and sha512.
+""")
+group.add_argument('--md5sum', action='store_const',
+                    dest='hash', const='md5',
+                    help="""
+Display the MD5 hash of each file as it is read from Keep.
+""")
+parser.add_argument('-n', action='store_true',
+                    help="""
+Do not write any data -- just read from Keep, and report md5sums if
+requested.
+""")
+parser.add_argument('-r', action='store_true',
+                    help="""
+Retrieve all files in the specified collection/prefix. This is the
+default behavior if the "locator" argument ends with a forward slash.
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('-f', action='store_true',
+                   help="""
+Overwrite existing files while writing. The default behavior is to
+refuse to write *anything* if any of the output files already
+exist. As a special case, -f is not needed to write to stdout.
+""")
+group.add_argument('--skip-existing', action='store_true',
+                   help="""
+Skip files that already exist. The default behavior is to refuse to
+write *anything* if any files exist that would have to be
+overwritten. This option causes even devices, sockets, and fifos to be
+skipped.
+""")
+
+def parse_arguments(arguments, stdout, stderr):
+    args = parser.parse_args(arguments)
+
+    if args.locator[-1] == os.sep:
+        args.r = True
+    if (args.r and
+        not args.n and
+        not (args.destination and
+             os.path.isdir(args.destination))):
+        parser.error('Destination is not a directory.')
+    if not args.r and (os.path.isdir(args.destination) or
+                       args.destination[-1] == os.path.sep):
+        args.destination = os.path.join(args.destination,
+                                        os.path.basename(args.locator))
+        logger.debug("Appended source file name to destination directory: %s",
+                     args.destination)
+
+    if args.destination == '/dev/stdout':
+        args.destination = "-"
+
+    if args.destination == '-':
+        # Normally you have to use -f to write to a file (or device) that
+        # already exists, but "-" and "/dev/stdout" are common enough to
+        # merit a special exception.
+        args.f = True
+    else:
+        args.destination = args.destination.rstrip(os.sep)
+
+    # Turn on --progress by default if stderr is a tty and output is
+    # either going to a named file, or going (via stdout) to something
+    # that isn't a tty.
+    if (not (args.batch_progress or args.no_progress)
+        and stderr.isatty()
+        and (args.destination != '-'
+             or not stdout.isatty())):
+        args.progress = True
+    return args
+
+def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
+    global api_client
+    
+    args = parse_arguments(arguments, stdout, stderr)
+    if api_client is None:
+        api_client = arvados.api('v1')
+
+    r = re.search(r'^(.*?)(/.*)?$', args.locator)
+    collection = r.group(1)
+    get_prefix = r.group(2)
+    if args.r and not get_prefix:
+        get_prefix = os.sep
+    try:
+        reader = arvados.CollectionReader(collection, num_retries=args.retries)
+    except Exception as error:
+        logger.error("failed to read collection: {}".format(error))
+        return 1
+
+    # User asked to download the collection's manifest
+    if not get_prefix:
+        if not args.n:
+            open_flags = os.O_CREAT | os.O_WRONLY
+            if not args.f:
+                open_flags |= os.O_EXCL
+            try:
+                if args.destination == "-":
+                    stdout.write(reader.manifest_text())
+                else:
+                    out_fd = os.open(args.destination, open_flags)
+                    with os.fdopen(out_fd, 'wb') as out_file:
+                        out_file.write(reader.manifest_text())
+            except (IOError, OSError) as error:
+                logger.error("can't write to '{}': {}".format(args.destination, error))
+                return 1
+            except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
+                logger.error("failed to download '{}': {}".format(collection, error))
+                return 1
+        return 0
+
+    # Scan the collection. Make an array of (stream, file, local
+    # destination filename) tuples, and add up total size to extract.
+    todo = []
+    todo_bytes = 0
+    try:
+        if get_prefix == os.sep:
+            item = reader
+        else:
+            item = reader.find('.' + get_prefix)
+
+        if isinstance(item, arvados.collection.Subcollection) or isinstance(item, arvados.collection.CollectionReader):
+            # If the user asked for a file and we got a subcollection, error out.
+            if get_prefix[-1] != os.sep:
+                logger.error("requested file '{}' is in fact a subcollection. Append a trailing '/' to download it.".format('.' + get_prefix))
+                return 1
+            # If the user asked stdout as a destination, error out.
+            elif args.destination == '-':
+                logger.error("cannot use 'stdout' as destination when downloading multiple files.")
+                return 1
+            # User asked for a subcollection, and that's what was found. Add up total size
+            # to download.
+            for s, f in files_in_collection(item):
+                dest_path = os.path.join(
+                    args.destination,
+                    os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
+                if (not (args.n or args.f or args.skip_existing) and
+                    os.path.exists(dest_path)):
+                    logger.error('Local file %s already exists.' % (dest_path,))
+                    return 1
+                todo += [(s, f, dest_path)]
+                todo_bytes += f.size()
+        elif isinstance(item, arvados.arvfile.ArvadosFile):
+            todo += [(item.parent, item, args.destination)]
+            todo_bytes += item.size()
+        else:
+            logger.error("'{}' not found.".format('.' + get_prefix))
+            return 1
+    except (IOError, arvados.errors.NotFoundError) as e:
+        logger.error(e)
+        return 1
+
+    out_bytes = 0
+    for s, f, outfilename in todo:
+        outfile = None
+        digestor = None
+        if not args.n:
+            if outfilename == "-":
+                outfile = stdout
+            else:
+                if args.skip_existing and os.path.exists(outfilename):
+                    logger.debug('Local file %s exists. Skipping.', outfilename)
+                    continue
+                elif not args.f and (os.path.isfile(outfilename) or
+                                   os.path.isdir(outfilename)):
+                    # Good thing we looked again: apparently this file wasn't
+                    # here yet when we checked earlier.
+                    logger.error('Local file %s already exists.' % (outfilename,))
+                    return 1
+                if args.r:
+                    arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
+                try:
+                    outfile = open(outfilename, 'wb')
+                except Exception as error:
+                    logger.error('Open(%s) failed: %s' % (outfilename, error))
+                    return 1
+        if args.hash:
+            digestor = hashlib.new(args.hash)
+        try:
+            with s.open(f.name, 'r') as file_reader:
+                for data in file_reader.readall():
+                    if outfile:
+                        outfile.write(data)
+                    if digestor:
+                        digestor.update(data)
+                    out_bytes += len(data)
+                    if args.progress:
+                        stderr.write('\r%d MiB / %d MiB %.1f%%' %
+                                     (out_bytes >> 20,
+                                      todo_bytes >> 20,
+                                      (100
+                                       if todo_bytes==0
+                                       else 100.0*out_bytes/todo_bytes)))
+                    elif args.batch_progress:
+                        stderr.write('%s %d read %d total\n' %
+                                     (sys.argv[0], os.getpid(),
+                                      out_bytes, todo_bytes))
+            if digestor:
+                stderr.write("%s  %s/%s\n"
+                             % (digestor.hexdigest(), s.stream_name(), f.name))
+        except KeyboardInterrupt:
+            if outfile and (outfile.fileno() > 2) and not outfile.closed:
+                os.unlink(outfile.name)
+            break
+        finally:
+            if outfile != None and outfile != stdout:
+                outfile.close()
+
+    if args.progress:
+        stderr.write('\n')
+    return 0
+
+def files_in_collection(c):
+    # Sort first by file type, then alphabetically by file path.
+    for i in sorted(c.keys(),
+                    key=lambda k: (
+                        isinstance(c[k], arvados.collection.Subcollection),
+                        k.upper())):
+        if isinstance(c[i], arvados.arvfile.ArvadosFile):
+            yield (c, c[i])
+        elif isinstance(c[i], arvados.collection.Subcollection):
+            for s, f in files_in_collection(c[i]):
+                yield (s, f)
index a2f2e542754f7e2e44edbd5673cf36d2c5d130af..c6ca0855f70098359f7648694fd807150497cb3e 100755 (executable)
@@ -3,6 +3,9 @@
 from __future__ import print_function
 
 import argparse
+import collections
+import logging
+import re
 import sys
 
 import arvados
@@ -10,13 +13,15 @@ import arvados.commands._util as arv_cmd
 
 from arvados._version import __version__
 
+FileInfo = collections.namedtuple('FileInfo', ['stream_name', 'name', 'size'])
+
 def parse_args(args):
     parser = argparse.ArgumentParser(
         description='List contents of a manifest',
         parents=[arv_cmd.retry_opt])
 
     parser.add_argument('locator', type=str,
-                        help="""Collection UUID or locator""")
+                        help="""Collection UUID or locator, optionally with a subdir path.""")
     parser.add_argument('-s', action='store_true',
                         help="""List file sizes, in KiB.""")
     parser.add_argument('--version', action='version',
@@ -26,25 +31,43 @@ def parse_args(args):
     return parser.parse_args(args)
 
 def size_formatter(coll_file):
-    return "{:>10}".format((coll_file.size() + 1023) / 1024)
+    return "{:>10}".format((coll_file.size + 1023) / 1024)
 
 def name_formatter(coll_file):
-    return "{}/{}".format(coll_file.stream_name(), coll_file.name)
+    return "{}/{}".format(coll_file.stream_name, coll_file.name)
 
-def main(args, stdout, stderr, api_client=None):
+def main(args, stdout, stderr, api_client=None, logger=None):
     args = parse_args(args)
 
     if api_client is None:
         api_client = arvados.api('v1')
 
+    if logger is None:
+        logger = logging.getLogger('arvados.arv-ls')
+
     try:
-        cr = arvados.CollectionReader(args.locator, api_client=api_client,
+        r = re.search(r'^(.*?)(/.*)?$', args.locator)
+        collection = r.group(1)
+        get_prefix = r.group(2)
+
+        cr = arvados.CollectionReader(collection, api_client=api_client,
                                       num_retries=args.retries)
-        cr.normalize()
-    except (arvados.errors.ArgumentError,
+        if get_prefix:
+            if get_prefix[-1] == '/':
+                get_prefix = get_prefix[:-1]
+            stream_name = '.' + get_prefix
+            reader = cr.find(stream_name)
+            if not (isinstance(reader, arvados.CollectionReader) or
+                    isinstance(reader, arvados.collection.Subcollection)):
+                logger.error("'{}' is not a subdirectory".format(get_prefix))
+                return 1
+        else:
+            stream_name = '.'
+            reader = cr
+    except (arvados.errors.ApiError,
+            arvados.errors.ArgumentError,
             arvados.errors.NotFoundError) as error:
-        print("arv-ls: error fetching collection: {}".format(error),
-              file=stderr)
+        logger.error("error fetching collection: {}".format(error))
         return 1
 
     formatters = []
@@ -52,7 +75,21 @@ def main(args, stdout, stderr, api_client=None):
         formatters.append(size_formatter)
     formatters.append(name_formatter)
 
-    for f in cr.all_files():
+    for f in files_in_collection(reader, stream_name):
         print(*(info_func(f) for info_func in formatters), file=stdout)
 
     return 0
+
+def files_in_collection(c, stream_name='.'):
+    # Sort first by file type, then alphabetically by file path.
+    for i in sorted(c.keys(),
+                    key=lambda k: (
+                        isinstance(c[k], arvados.collection.Subcollection),
+                        k.upper())):
+        if isinstance(c[i], arvados.arvfile.ArvadosFile):
+            yield FileInfo(stream_name=stream_name,
+                           name=i,
+                           size=c[i].size())
+        elif isinstance(c[i], arvados.collection.Subcollection):
+            for f in files_in_collection(c[i], "{}/{}".format(stream_name, i)):
+                yield f
index f91b3977090da7c6f8b30844635174d122e67ba2..1c2e552490dcd49ba3fe1d9b893b20329f585fac 100755 (executable)
@@ -1,238 +1,7 @@
 #!/usr/bin/env python
 
-import argparse
-import hashlib
-import os
-import re
-import string
 import sys
-import logging
 
-import arvados
-import arvados.commands._util as arv_cmd
+from arvados.commands.get import main
 
-from arvados._version import __version__
-
-logger = logging.getLogger('arvados.arv-get')
-
-def abort(msg, code=1):
-    print >>sys.stderr, "arv-get:", msg
-    exit(code)
-
-parser = argparse.ArgumentParser(
-    description='Copy data from Keep to a local file or pipe.',
-    parents=[arv_cmd.retry_opt])
-parser.add_argument('--version', action='version',
-                    version="%s %s" % (sys.argv[0], __version__),
-                    help='Print version and exit.')
-parser.add_argument('locator', type=str,
-                    help="""
-Collection locator, optionally with a file path or prefix.
-""")
-parser.add_argument('destination', type=str, nargs='?', default='-',
-                    help="""
-Local file or directory where the data is to be written. Default: stdout.
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('--progress', action='store_true',
-                   help="""
-Display human-readable progress on stderr (bytes and, if possible,
-percentage of total data size). This is the default behavior when it
-is not expected to interfere with the output: specifically, stderr is
-a tty _and_ either stdout is not a tty, or output is being written to
-named files rather than stdout.
-""")
-group.add_argument('--no-progress', action='store_true',
-                   help="""
-Do not display human-readable progress on stderr.
-""")
-group.add_argument('--batch-progress', action='store_true',
-                   help="""
-Display machine-readable progress on stderr (bytes and, if known,
-total data size).
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('--hash',
-                    help="""
-Display the hash of each file as it is read from Keep, using the given
-hash algorithm. Supported algorithms include md5, sha1, sha224,
-sha256, sha384, and sha512.
-""")
-group.add_argument('--md5sum', action='store_const',
-                    dest='hash', const='md5',
-                    help="""
-Display the MD5 hash of each file as it is read from Keep.
-""")
-parser.add_argument('-n', action='store_true',
-                    help="""
-Do not write any data -- just read from Keep, and report md5sums if
-requested.
-""")
-parser.add_argument('-r', action='store_true',
-                    help="""
-Retrieve all files in the specified collection/prefix. This is the
-default behavior if the "locator" argument ends with a forward slash.
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('-f', action='store_true',
-                   help="""
-Overwrite existing files while writing. The default behavior is to
-refuse to write *anything* if any of the output files already
-exist. As a special case, -f is not needed to write to stdout.
-""")
-group.add_argument('--skip-existing', action='store_true',
-                   help="""
-Skip files that already exist. The default behavior is to refuse to
-write *anything* if any files exist that would have to be
-overwritten. This option causes even devices, sockets, and fifos to be
-skipped.
-""")
-
-args = parser.parse_args()
-
-if args.locator[-1] == os.sep:
-    args.r = True
-if (args.r and
-    not args.n and
-    not (args.destination and
-         os.path.isdir(args.destination))):
-    parser.error('Destination is not a directory.')
-if not args.r and (os.path.isdir(args.destination) or
-                   args.destination[-1] == os.path.sep):
-    args.destination = os.path.join(args.destination,
-                                    os.path.basename(args.locator))
-    logger.debug("Appended source file name to destination directory: %s",
-                 args.destination)
-
-if args.destination == '/dev/stdout':
-    args.destination = "-"
-
-if args.destination == '-':
-    # Normally you have to use -f to write to a file (or device) that
-    # already exists, but "-" and "/dev/stdout" are common enough to
-    # merit a special exception.
-    args.f = True
-else:
-    args.destination = args.destination.rstrip(os.sep)
-
-# Turn on --progress by default if stderr is a tty and output is
-# either going to a named file, or going (via stdout) to something
-# that isn't a tty.
-if (not (args.batch_progress or args.no_progress)
-    and sys.stderr.isatty()
-    and (args.destination != '-'
-         or not sys.stdout.isatty())):
-    args.progress = True
-
-
-r = re.search(r'^(.*?)(/.*)?$', args.locator)
-collection = r.group(1)
-get_prefix = r.group(2)
-if args.r and not get_prefix:
-    get_prefix = os.sep
-api_client = arvados.api('v1')
-reader = arvados.CollectionReader(collection, num_retries=args.retries)
-
-if not get_prefix:
-    if not args.n:
-        open_flags = os.O_CREAT | os.O_WRONLY
-        if not args.f:
-            open_flags |= os.O_EXCL
-        try:
-            if args.destination == "-":
-                sys.stdout.write(reader.manifest_text())
-            else:
-                out_fd = os.open(args.destination, open_flags)
-                with os.fdopen(out_fd, 'wb') as out_file:
-                    out_file.write(reader.manifest_text())
-        except (IOError, OSError) as error:
-            abort("can't write to '{}': {}".format(args.destination, error))
-        except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
-            abort("failed to download '{}': {}".format(collection, error))
-    sys.exit(0)
-
-reader.normalize()
-
-# Scan the collection. Make an array of (stream, file, local
-# destination filename) tuples, and add up total size to extract.
-todo = []
-todo_bytes = 0
-try:
-    for s in reader.all_streams():
-        for f in s.all_files():
-            if get_prefix and get_prefix[-1] == os.sep:
-                if 0 != string.find(os.path.join(s.name(), f.name()),
-                                    '.' + get_prefix):
-                    continue
-                if args.destination == "-":
-                    dest_path = "-"
-                else:
-                    dest_path = os.path.join(
-                        args.destination,
-                        os.path.join(s.name(), f.name())[len(get_prefix)+1:])
-                    if (not (args.n or args.f or args.skip_existing) and
-                        os.path.exists(dest_path)):
-                        abort('Local file %s already exists.' % (dest_path,))
-            else:
-                if os.path.join(s.name(), f.name()) != '.' + get_prefix:
-                    continue
-                dest_path = args.destination
-            todo += [(s, f, dest_path)]
-            todo_bytes += f.size()
-except arvados.errors.NotFoundError as e:
-    abort(e)
-
-# Read data, and (if not -n) write to local file(s) or pipe.
-
-out_bytes = 0
-for s,f,outfilename in todo:
-    outfile = None
-    digestor = None
-    if not args.n:
-        if outfilename == "-":
-            outfile = sys.stdout
-        else:
-            if args.skip_existing and os.path.exists(outfilename):
-                logger.debug('Local file %s exists. Skipping.', outfilename)
-                continue
-            elif not args.f and (os.path.isfile(outfilename) or
-                               os.path.isdir(outfilename)):
-                # Good thing we looked again: apparently this file wasn't
-                # here yet when we checked earlier.
-                abort('Local file %s already exists.' % (outfilename,))
-            if args.r:
-                arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
-            try:
-                outfile = open(outfilename, 'wb')
-            except Exception as error:
-                abort('Open(%s) failed: %s' % (outfilename, error))
-    if args.hash:
-        digestor = hashlib.new(args.hash)
-    try:
-        for data in f.readall():
-            if outfile:
-                outfile.write(data)
-            if digestor:
-                digestor.update(data)
-            out_bytes += len(data)
-            if args.progress:
-                sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
-                                 (out_bytes >> 20,
-                                  todo_bytes >> 20,
-                                  (100
-                                   if todo_bytes==0
-                                   else 100.0*out_bytes/todo_bytes)))
-            elif args.batch_progress:
-                sys.stderr.write('%s %d read %d total\n' %
-                                 (sys.argv[0], os.getpid(),
-                                  out_bytes, todo_bytes))
-        if digestor:
-            sys.stderr.write("%s  %s/%s\n"
-                             % (digestor.hexdigest(), s.name(), f.name()))
-    except KeyboardInterrupt:
-        if outfile and (outfile.fileno() > 2) and not outfile.closed:
-            os.unlink(outfile.name)
-        break
-
-if args.progress:
-    sys.stderr.write('\n')
+sys.exit(main(sys.argv[1:], sys.stdout, sys.stderr))
diff --git a/sdk/python/tests/test_arv_get.py b/sdk/python/tests/test_arv_get.py
new file mode 100644 (file)
index 0000000..907c671
--- /dev/null
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import io
+import shutil
+import tempfile
+
+import arvados
+import arvados.collection as collection
+import arvados.commands.get as arv_get
+import run_test_server
+
+from arvados_testutil import redirected_streams
+
+class ArvadosGetTestCase(run_test_server.TestCaseWithServers):
+    MAIN_SERVER = {}
+    KEEP_SERVER = {}
+
+    def setUp(self):
+        super(ArvadosGetTestCase, self).setUp()
+        self.tempdir = tempfile.mkdtemp()
+        self.col_loc, self.col_pdh, self.col_manifest = self.write_test_collection()
+
+    def tearDown(self):
+        super(ArvadosGetTestCase, self).tearDown()
+        shutil.rmtree(self.tempdir)
+
+    def write_test_collection(self,
+                              contents = {
+                                  'foo.txt' : 'foo',
+                                  'bar.txt' : 'bar',
+                                  'subdir/baz.txt' : 'baz',
+                              }):
+        c = collection.Collection()
+        for path, data in contents.items():
+            with c.open(path, 'w') as f:
+                f.write(data)
+        c.save_new()
+        return (c.manifest_locator(), c.portable_data_hash(), c.manifest_text())
+    
+    def run_get(self, args):
+        self.stdout = io.BytesIO()
+        self.stderr = io.BytesIO()
+        return arv_get.main(args, self.stdout, self.stderr)
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_get(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+
+    def test_get_single_file(self):
+        # Get the file using the collection's locator
+        r = self.run_get(["{}/subdir/baz.txt".format(self.col_loc), '-'])
+        self.assertEqual(0, r)
+        self.assertEqual('baz', self.stdout.getvalue())
+        # Then, try by PDH
+        r = self.run_get(["{}/subdir/baz.txt".format(self.col_pdh), '-'])
+        self.assertEqual(0, r)
+        self.assertEqual('baz', self.stdout.getvalue())        
+
+    def test_get_multiple_files(self):
+        # Download the entire collection to the temp directory
+        r = self.run_get(["{}/".format(self.col_loc), self.tempdir])
+        self.assertEqual(0, r)
+        with open("{}/foo.txt".format(self.tempdir), "r") as f:
+            self.assertEqual("foo", f.read())
+        with open("{}/bar.txt".format(self.tempdir), "r") as f:
+            self.assertEqual("bar", f.read())
+        with open("{}/subdir/baz.txt".format(self.tempdir), "r") as f:
+            self.assertEqual("baz", f.read())
+
+    def test_get_collection_manifest(self):
+        # Get the collection manifest
+        r = self.run_get([self.col_loc, self.tempdir])
+        self.assertEqual(0, r)
+        with open("{}/{}".format(self.tempdir, self.col_loc), "r") as f:
+            self.assertEqual(self.col_manifest, f.read())
+
+    def test_invalid_collection(self):
+        # Asking for an invalid collection should generate an error.
+        r = self.run_get(['this-uuid-seems-to-be-fake', self.tempdir])
+        self.assertNotEqual(0, r)
+
+    def test_invalid_file_request(self):
+        # Asking for an inexistant file within a collection should generate an error.
+        r = self.run_get(["{}/im-not-here.txt".format(self.col_loc), self.tempdir])
+        self.assertNotEqual(0, r)
+
+    def test_invalid_destination(self):
+        # Asking to place the collection's files on a non existant directory
+        # should generate an error.
+        r = self.run_get([self.col_loc, "/fake/subdir/"])
+        self.assertNotEqual(0, r)
+
+    def test_preexistent_destination(self):
+        # Asking to place a file with the same path as a local one should
+        # generate an error and avoid overwrites.
+        with open("{}/foo.txt".format(self.tempdir), "w") as f:
+            f.write("another foo")
+        r = self.run_get(["{}/foo.txt".format(self.col_loc), self.tempdir])
+        self.assertNotEqual(0, r)
+        with open("{}/foo.txt".format(self.tempdir), "r") as f:
+            self.assertEqual("another foo", f.read())
+
index 5064f07d722ee77efc0c8a4f733eaf86d02b8b39..99b551082f8c7399500e9b71f3338050f33fea02 100644 (file)
@@ -35,10 +35,10 @@ class ArvLsTestCase(run_test_server.TestCaseWithServers):
         api_client.collections().get().execute.return_value = coll_info
         return coll_info, api_client
 
-    def run_ls(self, args, api_client):
+    def run_ls(self, args, api_client, logger=None):
         self.stdout = io.BytesIO()
         self.stderr = io.BytesIO()
-        return arv_ls.main(args, self.stdout, self.stderr, api_client)
+        return arv_ls.main(args, self.stdout, self.stderr, api_client, logger)
 
     def test_plain_listing(self):
         collection, api_client = self.mock_api_for_manifest(
@@ -76,10 +76,13 @@ class ArvLsTestCase(run_test_server.TestCaseWithServers):
 
     def test_locator_failure(self):
         api_client = mock.MagicMock(name='mock_api_client')
+        error_mock = mock.MagicMock()
+        logger = mock.MagicMock()
+        logger.error = error_mock
         api_client.collections().get().execute.side_effect = (
             arv_error.NotFoundError)
-        self.assertNotEqual(0, self.run_ls([self.FAKE_UUID], api_client))
-        self.assertNotEqual('', self.stderr.getvalue())
+        self.assertNotEqual(0, self.run_ls([self.FAKE_UUID], api_client, logger))
+        self.assertEqual(1, error_mock.call_count)
 
     def test_version_argument(self):
         err = io.BytesIO()