sdk/python/arvados/commands/put.py

   1 #!/usr/bin/env python
   2
   3 # TODO:
   4 # --md5sum - display md5 of each file as read from disk
   5
   6 import argparse
   7 import arvados
   8 import arvados.collection
   9 import base64
  10 import datetime
  11 import errno
  12 import fcntl
  13 import hashlib
  14 import json
  15 import os
  16 import pwd
  17 import time
  18 import signal
  19 import socket
  20 import sys
  21 import tempfile
  22 import threading
  23 import copy
  24 import logging
  25 from apiclient import errors as apiclient_errors
  26 from arvados._version import __version__
  27
  28 import arvados.commands._util as arv_cmd
  29
  30 CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
  31 api_client = None
  32
  33 upload_opts = argparse.ArgumentParser(add_help=False)
  34
  35 upload_opts.add_argument('--version', action='version',
  36                          version="%s %s" % (sys.argv[0], __version__),
  37                          help='Print version and exit.')
  38 upload_opts.add_argument('paths', metavar='path', type=str, nargs='*',
  39                          help="""
  40 Local file or directory. Default: read from standard input.
  41 """)
  42
  43 _group = upload_opts.add_mutually_exclusive_group()
  44
  45 _group.add_argument('--max-manifest-depth', type=int, metavar='N',
  46                     default=-1, help="""
  47 Maximum depth of directory tree to represent in the manifest
  48 structure. A directory structure deeper than this will be represented
  49 as a single stream in the manifest. If N=0, the manifest will contain
  50 a single stream. Default: -1 (unlimited), i.e., exactly one manifest
  51 stream per filesystem directory that contains files.
  52 """)
  53
  54 _group.add_argument('--normalize', action='store_true',
  55                     help="""
  56 Normalize the manifest by re-ordering files and streams after writing
  57 data.
  58 """)
  59
  60 _group = upload_opts.add_mutually_exclusive_group()
  61
  62 _group.add_argument('--as-stream', action='store_true', dest='stream',
  63                     help="""
  64 Synonym for --stream.
  65 """)
  66
  67 _group.add_argument('--stream', action='store_true',
  68                     help="""
  69 Store the file content and display the resulting manifest on
  70 stdout. Do not write the manifest to Keep or save a Collection object
  71 in Arvados.
  72 """)
  73
  74 _group.add_argument('--as-manifest', action='store_true', dest='manifest',
  75                     help="""
  76 Synonym for --manifest.
  77 """)
  78
  79 _group.add_argument('--in-manifest', action='store_true', dest='manifest',
  80                     help="""
  81 Synonym for --manifest.
  82 """)
  83
  84 _group.add_argument('--manifest', action='store_true',
  85                     help="""
  86 Store the file data and resulting manifest in Keep, save a Collection
  87 object in Arvados, and display the manifest locator (Collection uuid)
  88 on stdout. This is the default behavior.
  89 """)
  90
  91 _group.add_argument('--as-raw', action='store_true', dest='raw',
  92                     help="""
  93 Synonym for --raw.
  94 """)
  95
  96 _group.add_argument('--raw', action='store_true',
  97                     help="""
  98 Store the file content and display the data block locators on stdout,
  99 separated by commas, with a trailing newline. Do not store a
 100 manifest.
 101 """)
 102
 103 upload_opts.add_argument('--use-filename', type=str, default=None,
 104                          dest='filename', help="""
 105 Synonym for --filename.
 106 """)
 107
 108 upload_opts.add_argument('--filename', type=str, default=None,
 109                          help="""
 110 Use the given filename in the manifest, instead of the name of the
 111 local file. This is useful when "-" or "/dev/stdin" is given as an
 112 input file. It can be used only if there is exactly one path given and
 113 it is not a directory. Implies --manifest.
 114 """)
 115
 116 upload_opts.add_argument('--portable-data-hash', action='store_true',
 117                          help="""
 118 Print the portable data hash instead of the Arvados UUID for the collection
 119 created by the upload.
 120 """)
 121
 122 upload_opts.add_argument('--replication', type=int, metavar='N', default=None,
 123                          help="""
 124 Set the replication level for the new collection: how many different
 125 physical storage devices (e.g., disks) should have a copy of each data
 126 block. Default is to use the server-provided default (if any) or 2.
 127 """)
 128
 129 run_opts = argparse.ArgumentParser(add_help=False)
 130
 131 run_opts.add_argument('--project-uuid', metavar='UUID', help="""
 132 Store the collection in the specified project, instead of your Home
 133 project.
 134 """)
 135
 136 run_opts.add_argument('--name', help="""
 137 Save the collection with the specified name.
 138 """)
 139
 140 _group = run_opts.add_mutually_exclusive_group()
 141 _group.add_argument('--progress', action='store_true',
 142                     help="""
 143 Display human-readable progress on stderr (bytes and, if possible,
 144 percentage of total data size). This is the default behavior when
 145 stderr is a tty.
 146 """)
 147
 148 _group.add_argument('--no-progress', action='store_true',
 149                     help="""
 150 Do not display human-readable progress on stderr, even if stderr is a
 151 tty.
 152 """)
 153
 154 _group.add_argument('--batch-progress', action='store_true',
 155                     help="""
 156 Display machine-readable progress on stderr (bytes and, if known,
 157 total data size).
 158 """)
 159
 160 _group = run_opts.add_mutually_exclusive_group()
 161 _group.add_argument('--resume', action='store_true', default=True,
 162                     help="""
 163 Continue interrupted uploads from cached state (default).
 164 """)
 165 _group.add_argument('--no-resume', action='store_false', dest='resume',
 166                     help="""
 167 Do not continue interrupted uploads from cached state.
 168 """)
 169
 170 arg_parser = argparse.ArgumentParser(
 171     description='Copy data from the local filesystem to Keep.',
 172     parents=[upload_opts, run_opts, arv_cmd.retry_opt])
 173
 174 def parse_arguments(arguments):
 175     args = arg_parser.parse_args(arguments)
 176
 177     if len(args.paths) == 0:
 178         args.paths = ['-']
 179
 180     args.paths = map(lambda x: "-" if x == "/dev/stdin" else x, args.paths)
 181
 182     if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
 183         if args.filename:
 184             arg_parser.error("""
 185     --filename argument cannot be used when storing a directory or
 186     multiple files.
 187     """)
 188
 189     # Turn on --progress by default if stderr is a tty.
 190     if (not (args.batch_progress or args.no_progress)
 191         and os.isatty(sys.stderr.fileno())):
 192         args.progress = True
 193
 194     if args.paths == ['-']:
 195         args.resume = False
 196         if not args.filename:
 197             args.filename = 'stdin'
 198
 199     return args
 200
 201 class ResumeCacheConflict(Exception):
 202     pass
 203
 204
 205 class ResumeCache(object):
 206     CACHE_DIR = '.cache/arvados/arv-put'
 207
 208     def __init__(self, file_spec):
 209         self.cache_file = open(file_spec, 'a+')
 210         self._lock_file(self.cache_file)
 211         self.filename = self.cache_file.name
 212
 213     @classmethod
 214     def make_path(cls, args):
 215         md5 = hashlib.md5()
 216         md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost'))
 217         realpaths = sorted(os.path.realpath(path) for path in args.paths)
 218         md5.update('\0'.join(realpaths))
 219         if any(os.path.isdir(path) for path in realpaths):
 220             md5.update(str(max(args.max_manifest_depth, -1)))
 221         elif args.filename:
 222             md5.update(args.filename)
 223         return os.path.join(
 224             arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700, 'raise'),
 225             md5.hexdigest())
 226
 227     def _lock_file(self, fileobj):
 228         try:
 229             fcntl.flock(fileobj, fcntl.LOCK_EX | fcntl.LOCK_NB)
 230         except IOError:
 231             raise ResumeCacheConflict("{} locked".format(fileobj.name))
 232
 233     def load(self):
 234         self.cache_file.seek(0)
 235         return json.load(self.cache_file)
 236
 237     def check_cache(self, api_client=None, num_retries=0):
 238         try:
 239             state = self.load()
 240             locator = None
 241             try:
 242                 if "_finished_streams" in state and len(state["_finished_streams"]) > 0:
 243                     locator = state["_finished_streams"][0][1][0]
 244                 elif "_current_stream_locators" in state and len(state["_current_stream_locators"]) > 0:
 245                     locator = state["_current_stream_locators"][0]
 246                 if locator is not None:
 247                     kc = arvados.keep.KeepClient(api_client=api_client)
 248                     kc.head(locator, num_retries=num_retries)
 249             except Exception as e:
 250                 self.restart()
 251         except (ValueError):
 252             pass
 253
 254     def save(self, data):
 255         try:
 256             new_cache_fd, new_cache_name = tempfile.mkstemp(
 257                 dir=os.path.dirname(self.filename))
 258             self._lock_file(new_cache_fd)
 259             new_cache = os.fdopen(new_cache_fd, 'r+')
 260             json.dump(data, new_cache)
 261             os.rename(new_cache_name, self.filename)
 262         except (IOError, OSError, ResumeCacheConflict) as error:
 263             try:
 264                 os.unlink(new_cache_name)
 265             except NameError:  # mkstemp failed.
 266                 pass
 267         else:
 268             self.cache_file.close()
 269             self.cache_file = new_cache
 270
 271     def close(self):
 272         self.cache_file.close()
 273
 274     def destroy(self):
 275         try:
 276             os.unlink(self.filename)
 277         except OSError as error:
 278             if error.errno != errno.ENOENT:  # That's what we wanted anyway.
 279                 raise
 280         self.close()
 281
 282     def restart(self):
 283         self.destroy()
 284         self.__init__(self.filename)
 285
 286
 287 class ArvPutUploadJob(object):
 288     CACHE_DIR = '.cache/arvados/arv-put'
 289     EMPTY_STATE = {
 290         'manifest' : None, # Last saved manifest checkpoint
 291         'files' : {} # Previous run file list: {path : {size, mtime}}
 292     }
 293
 294     def __init__(self, paths, resume=True, reporter=None, bytes_expected=None,
 295                  name=None, owner_uuid=None, ensure_unique_name=False,
 296                  num_retries=None, replication_desired=None,
 297                  filename=None, update_time=1.0):
 298         self.paths = paths
 299         self.resume = resume
 300         self.reporter = reporter
 301         self.bytes_expected = bytes_expected
 302         self.bytes_written = 0
 303         self.bytes_skipped = 0
 304         self.name = name
 305         self.owner_uuid = owner_uuid
 306         self.ensure_unique_name = ensure_unique_name
 307         self.num_retries = num_retries
 308         self.replication_desired = replication_desired
 309         self.filename = filename
 310         self._state_lock = threading.Lock()
 311         self._state = None # Previous run state (file list & manifest)
 312         self._current_files = [] # Current run file list
 313         self._cache_file = None
 314         self._collection = None
 315         self._collection_lock = threading.Lock()
 316         self._stop_checkpointer = threading.Event()
 317         self._checkpointer = threading.Thread(target=self._update_task)
 318         self._update_task_time = update_time  # How many seconds wait between update runs
 319         self.logger = logging.getLogger('arvados.arv_put')
 320         # Load cached data if any and if needed
 321         self._setup_state()
 322
 323     def start(self):
 324         """
 325         Start supporting thread & file uploading
 326         """
 327         self._checkpointer.daemon = True
 328         self._checkpointer.start()
 329         try:
 330             for path in self.paths:
 331                 # Test for stdin first, in case some file named '-' exist
 332                 if path == '-':
 333                     self._write_stdin(self.filename or 'stdin')
 334                 elif os.path.isdir(path):
 335                     self._write_directory_tree(path)
 336                 else:
 337                     self._write_file(path, self.filename or os.path.basename(path))
 338         finally:
 339             # Stop the thread before doing anything else
 340             self._stop_checkpointer.set()
 341             self._checkpointer.join()
 342             # Commit all & one last _update()
 343             self.manifest_text()
 344             self._update()
 345             if self.resume:
 346                 self._cache_file.close()
 347                 # Correct the final written bytes count
 348                 self.bytes_written -= self.bytes_skipped
 349
 350     def save_collection(self):
 351         with self._collection_lock:
 352             self._my_collection().save_new(
 353                 name=self.name, owner_uuid=self.owner_uuid,
 354                 ensure_unique_name=self.ensure_unique_name,
 355                 num_retries=self.num_retries)
 356
 357     def destroy_cache(self):
 358         if self.resume:
 359             try:
 360                 os.unlink(self._cache_filename)
 361             except OSError as error:
 362                 # That's what we wanted anyway.
 363                 if error.errno != errno.ENOENT:
 364                     raise
 365             self._cache_file.close()
 366
 367     def _collection_size(self, collection):
 368         """
 369         Recursively get the total size of the collection
 370         """
 371         size = 0
 372         for item in collection.values():
 373             if isinstance(item, arvados.collection.Collection) or isinstance(item, arvados.collection.Subcollection):
 374                 size += self._collection_size(item)
 375             else:
 376                 size += item.size()
 377         return size
 378
 379     def _update_task(self):
 380         """
 381         Periodically called support task. File uploading is
 382         asynchronous so we poll status from the collection.
 383         """
 384         while not self._stop_checkpointer.wait(self._update_task_time):
 385             self._update()
 386
 387     def _update(self):
 388         """
 389         Update cached manifest text and report progress.
 390         """
 391         with self._collection_lock:
 392             self.bytes_written = self._collection_size(self._my_collection())
 393             # Update cache, if resume enabled
 394             if self.resume:
 395                 with self._state_lock:
 396                     # Get the manifest text without comitting pending blocks
 397                     self._state['manifest'] = self._my_collection()._get_manifest_text(".", strip=False, normalize=False, only_committed=True)
 398                 self._save_state()
 399         # Call the reporter, if any
 400         self.report_progress()
 401
 402     def report_progress(self):
 403         if self.reporter is not None:
 404             self.reporter(self.bytes_written, self.bytes_expected)
 405
 406     def _write_directory_tree(self, path, stream_name="."):
 407         # TODO: Check what happens when multiple directories are passed as
 408         # arguments.
 409         # If the code below is uncommented, integration test
 410         # test_ArvPutSignedManifest (tests.test_arv_put.ArvPutIntegrationTest)
 411         # fails, I suppose it is because the manifest_uuid changes because
 412         # of the dir addition to stream_name.
 413
 414         # if stream_name == '.':
 415         #     stream_name = os.path.join('.', os.path.basename(path))
 416         for item in os.listdir(path):
 417             if os.path.isdir(os.path.join(path, item)):
 418                 self._write_directory_tree(os.path.join(path, item),
 419                                 os.path.join(stream_name, item))
 420             else:
 421                 self._write_file(os.path.join(path, item),
 422                                 os.path.join(stream_name, item))
 423
 424     def _write_stdin(self, filename):
 425         with self._collection_lock:
 426             output = self._my_collection().open(filename, 'w')
 427         self._write(sys.stdin, output)
 428         output.close()
 429
 430     def _write_file(self, source, filename):
 431         resume_offset = 0
 432         if self.resume:
 433             # Check if file was already uploaded (at least partially)
 434             with self._collection_lock:
 435                 try:
 436                     file_in_collection = self._my_collection().find(filename)
 437                 except IOError:
 438                     # Not found
 439                     file_in_collection = None
 440             # If no previous cached data on this file, store it for an eventual
 441             # repeated run.
 442             if source not in self._state['files']:
 443                 with self._state_lock:
 444                     self._state['files'][source] = {
 445                         'mtime': os.path.getmtime(source),
 446                         'size' : os.path.getsize(source)
 447                     }
 448             with self._state_lock:
 449                 cached_file_data = self._state['files'][source]
 450             # See if this file was already uploaded at least partially
 451             if file_in_collection:
 452                 if cached_file_data['mtime'] == os.path.getmtime(source) and cached_file_data['size'] == os.path.getsize(source):
 453                     if cached_file_data['size'] == file_in_collection.size():
 454                         # File already there, skip it.
 455                         self.bytes_skipped += cached_file_data['size']
 456                         return
 457                     elif cached_file_data['size'] > file_in_collection.size():
 458                         # File partially uploaded, resume!
 459                         resume_offset = file_in_collection.size()
 460                     else:
 461                         # Inconsistent cache, re-upload the file
 462                         self.logger.warning("Uploaded version of file '{}' is bigger than local version, will re-upload it from scratch.".format(source))
 463                 else:
 464                     # Local file differs from cached data, re-upload it
 465                     pass
 466         with open(source, 'r') as source_fd:
 467             if resume_offset > 0:
 468                 # Start upload where we left off
 469                 with self._collection_lock:
 470                     output = self._my_collection().open(filename, 'a')
 471                 source_fd.seek(resume_offset)
 472                 self.bytes_skipped += resume_offset
 473             else:
 474                 # Start from scratch
 475                 with self._collection_lock:
 476                     output = self._my_collection().open(filename, 'w')
 477             self._write(source_fd, output)
 478             output.close(flush=False)
 479
 480     def _write(self, source_fd, output):
 481         first_read = True
 482         while True:
 483             data = source_fd.read(arvados.config.KEEP_BLOCK_SIZE)
 484             # Allow an empty file to be written
 485             if not data and not first_read:
 486                 break
 487             if first_read:
 488                 first_read = False
 489             output.write(data)
 490
 491     def _my_collection(self):
 492         """
 493         Create a new collection if none cached. Load it from cache otherwise.
 494         """
 495         if self._collection is None:
 496             with self._state_lock:
 497                 manifest = self._state['manifest']
 498             if self.resume and manifest is not None:
 499                 # Create collection from saved state
 500                 self._collection = arvados.collection.Collection(
 501                     manifest,
 502                     replication_desired=self.replication_desired)
 503             else:
 504                 # Create new collection
 505                 self._collection = arvados.collection.Collection(
 506                     replication_desired=self.replication_desired)
 507         return self._collection
 508
 509     def _setup_state(self):
 510         """
 511         Create a new cache file or load a previously existing one.
 512         """
 513         if self.resume:
 514             md5 = hashlib.md5()
 515             md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost'))
 516             realpaths = sorted(os.path.realpath(path) for path in self.paths)
 517             md5.update('\0'.join(realpaths))
 518             if self.filename:
 519                 md5.update(self.filename)
 520             cache_filename = md5.hexdigest()
 521             self._cache_file = open(os.path.join(
 522                 arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
 523                 cache_filename), 'a+')
 524             self._cache_filename = self._cache_file.name
 525             self._lock_file(self._cache_file)
 526             self._cache_file.seek(0)
 527             with self._state_lock:
 528                 try:
 529                     self._state = json.load(self._cache_file)
 530                     if not set(['manifest', 'files']).issubset(set(self._state.keys())):
 531                         # Cache at least partially incomplete, set up new cache
 532                         self._state = copy.deepcopy(self.EMPTY_STATE)
 533                 except ValueError:
 534                     # Cache file empty, set up new cache
 535                     self._state = copy.deepcopy(self.EMPTY_STATE)
 536             # Load how many bytes were uploaded on previous run
 537             with self._collection_lock:
 538                 self.bytes_written = self._collection_size(self._my_collection())
 539         # No resume required
 540         else:
 541             with self._state_lock:
 542                 self._state = copy.deepcopy(self.EMPTY_STATE)
 543
 544     def _lock_file(self, fileobj):
 545         try:
 546             fcntl.flock(fileobj, fcntl.LOCK_EX | fcntl.LOCK_NB)
 547         except IOError:
 548             raise ResumeCacheConflict("{} locked".format(fileobj.name))
 549
 550     def _save_state(self):
 551         """
 552         Atomically save current state into cache.
 553         """
 554         try:
 555             with self._state_lock:
 556                 state = self._state
 557             new_cache_fd, new_cache_name = tempfile.mkstemp(
 558                 dir=os.path.dirname(self._cache_filename))
 559             self._lock_file(new_cache_fd)
 560             new_cache = os.fdopen(new_cache_fd, 'r+')
 561             json.dump(state, new_cache)
 562             new_cache.flush()
 563             os.fsync(new_cache)
 564             os.rename(new_cache_name, self._cache_filename)
 565         except (IOError, OSError, ResumeCacheConflict) as error:
 566             self.logger.error("There was a problem while saving the cache file: {}".format(error))
 567             try:
 568                 os.unlink(new_cache_name)
 569             except NameError:  # mkstemp failed.
 570                 pass
 571         else:
 572             self._cache_file.close()
 573             self._cache_file = new_cache
 574
 575     def collection_name(self):
 576         with self._collection_lock:
 577             name = self._my_collection().api_response()['name'] if self._my_collection().api_response() else None
 578         return name
 579
 580     def manifest_locator(self):
 581         with self._collection_lock:
 582             locator = self._my_collection().manifest_locator()
 583         return locator
 584
 585     def portable_data_hash(self):
 586         with self._collection_lock:
 587             datahash = self._my_collection().portable_data_hash()
 588         return datahash
 589
 590     def manifest_text(self, stream_name=".", strip=False, normalize=False):
 591         with self._collection_lock:
 592             manifest = self._my_collection().manifest_text(stream_name, strip, normalize)
 593         return manifest
 594
 595     def _datablocks_on_item(self, item):
 596         """
 597         Return a list of datablock locators, recursively navigating
 598         through subcollections
 599         """
 600         if isinstance(item, arvados.arvfile.ArvadosFile):
 601             if item.size() == 0:
 602                 # Empty file locator
 603                 return ["d41d8cd98f00b204e9800998ecf8427e+0"]
 604             else:
 605                 locators = []
 606                 for segment in item.segments():
 607                     loc = segment.locator
 608                     locators.append(loc)
 609                 return locators
 610         elif isinstance(item, arvados.collection.Collection):
 611             l = [self._datablocks_on_item(x) for x in item.values()]
 612             # Fast list flattener method taken from:
 613             # http://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python
 614             return [loc for sublist in l for loc in sublist]
 615         else:
 616             return None
 617
 618     def data_locators(self):
 619         with self._collection_lock:
 620             # Make sure all datablocks are flushed before getting the locators
 621             self._my_collection().manifest_text()
 622             datablocks = self._datablocks_on_item(self._my_collection())
 623         return datablocks
 624
 625
 626 def expected_bytes_for(pathlist):
 627     # Walk the given directory trees and stat files, adding up file sizes,
 628     # so we can display progress as percent
 629     bytesum = 0
 630     for path in pathlist:
 631         if os.path.isdir(path):
 632             for filename in arvados.util.listdir_recursive(path):
 633                 bytesum += os.path.getsize(os.path.join(path, filename))
 634         elif not os.path.isfile(path):
 635             return None
 636         else:
 637             bytesum += os.path.getsize(path)
 638     return bytesum
 639
 640 _machine_format = "{} {}: {{}} written {{}} total\n".format(sys.argv[0],
 641                                                             os.getpid())
 642 def machine_progress(bytes_written, bytes_expected):
 643     return _machine_format.format(
 644         bytes_written, -1 if (bytes_expected is None) else bytes_expected)
 645
 646 def human_progress(bytes_written, bytes_expected):
 647     if bytes_expected:
 648         return "\r{}M / {}M {:.1%} ".format(
 649             bytes_written >> 20, bytes_expected >> 20,
 650             float(bytes_written) / bytes_expected)
 651     else:
 652         return "\r{} ".format(bytes_written)
 653
 654 def progress_writer(progress_func, outfile=sys.stderr):
 655     def write_progress(bytes_written, bytes_expected):
 656         outfile.write(progress_func(bytes_written, bytes_expected))
 657     return write_progress
 658
 659 def exit_signal_handler(sigcode, frame):
 660     sys.exit(-sigcode)
 661
 662 def desired_project_uuid(api_client, project_uuid, num_retries):
 663     if not project_uuid:
 664         query = api_client.users().current()
 665     elif arvados.util.user_uuid_pattern.match(project_uuid):
 666         query = api_client.users().get(uuid=project_uuid)
 667     elif arvados.util.group_uuid_pattern.match(project_uuid):
 668         query = api_client.groups().get(uuid=project_uuid)
 669     else:
 670         raise ValueError("Not a valid project UUID: {}".format(project_uuid))
 671     return query.execute(num_retries=num_retries)['uuid']
 672
 673 def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
 674     global api_client
 675
 676     args = parse_arguments(arguments)
 677     status = 0
 678     if api_client is None:
 679         api_client = arvados.api('v1')
 680
 681     # Determine the name to use
 682     if args.name:
 683         if args.stream or args.raw:
 684             print >>stderr, "Cannot use --name with --stream or --raw"
 685             sys.exit(1)
 686         collection_name = args.name
 687     else:
 688         collection_name = "Saved at {} by {}@{}".format(
 689             datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
 690             pwd.getpwuid(os.getuid()).pw_name,
 691             socket.gethostname())
 692
 693     if args.project_uuid and (args.stream or args.raw):
 694         print >>stderr, "Cannot use --project-uuid with --stream or --raw"
 695         sys.exit(1)
 696
 697     # Determine the parent project
 698     try:
 699         project_uuid = desired_project_uuid(api_client, args.project_uuid,
 700                                             args.retries)
 701     except (apiclient_errors.Error, ValueError) as error:
 702         print >>stderr, error
 703         sys.exit(1)
 704
 705     if args.progress:
 706         reporter = progress_writer(human_progress)
 707     elif args.batch_progress:
 708         reporter = progress_writer(machine_progress)
 709     else:
 710         reporter = None
 711
 712     bytes_expected = expected_bytes_for(args.paths)
 713     try:
 714         writer = ArvPutUploadJob(paths = args.paths,
 715                                  resume = args.resume,
 716                                  filename = args.filename,
 717                                  reporter = reporter,
 718                                  bytes_expected = bytes_expected,
 719                                  num_retries = args.retries,
 720                                  replication_desired = args.replication,
 721                                  name = collection_name,
 722                                  owner_uuid = project_uuid,
 723                                  ensure_unique_name = True)
 724     except ResumeCacheConflict:
 725         print >>stderr, "\n".join([
 726             "arv-put: Another process is already uploading this data.",
 727             "         Use --no-resume if this is really what you want."])
 728         sys.exit(1)
 729
 730     # Install our signal handler for each code in CAUGHT_SIGNALS, and save
 731     # the originals.
 732     orig_signal_handlers = {sigcode: signal.signal(sigcode, exit_signal_handler)
 733                             for sigcode in CAUGHT_SIGNALS}
 734
 735     if args.resume and writer.bytes_written > 0:
 736         print >>stderr, "\n".join([
 737                 "arv-put: Resuming previous upload from last checkpoint.",
 738                 "         Use the --no-resume option to start over."])
 739
 740     writer.report_progress()
 741     output = None
 742     writer.start()
 743     if args.progress:  # Print newline to split stderr from stdout for humans.
 744         print >>stderr
 745
 746     if args.stream:
 747         if args.normalize:
 748             output = writer.manifest_text(normalize=True)
 749         else:
 750             output = writer.manifest_text()
 751     elif args.raw:
 752         output = ','.join(writer.data_locators())
 753     else:
 754         try:
 755             writer.save_collection()
 756             print >>stderr, "Collection saved as '%s'" % writer.collection_name()
 757             if args.portable_data_hash:
 758                 output = writer.portable_data_hash()
 759             else:
 760                 output = writer.manifest_locator()
 761         except apiclient_errors.Error as error:
 762             print >>stderr, (
 763                 "arv-put: Error creating Collection on project: {}.".format(
 764                     error))
 765             status = 1
 766
 767     # Print the locator (uuid) of the new collection.
 768     if output is None:
 769         status = status or 1
 770     else:
 771         stdout.write(output)
 772         if not output.endswith('\n'):
 773             stdout.write('\n')
 774
 775     for sigcode, orig_handler in orig_signal_handlers.items():
 776         signal.signal(sigcode, orig_handler)
 777
 778     if status != 0:
 779         sys.exit(status)
 780
 781     # Success!
 782     writer.destroy_cache()
 783     return output
 784
 785
 786 if __name__ == '__main__':
 787     main()