Merge branch '17529-ec2-rate-limit'
[arvados.git] / sdk / python / arvados / commands / put.py
index a21719cabd3afb126f45ca6b78b85ca83c04024d..9596a2dc2d26efcddebbd7921e32ff38ebdb7352 100644 (file)
@@ -77,8 +77,7 @@ Synonym for --stream.
 _group.add_argument('--stream', action='store_true',
                     help="""
 Store the file content and display the resulting manifest on
-stdout. Do not write the manifest to Keep or save a Collection object
-in Arvados.
+stdout. Do not save a Collection object in Arvados.
 """)
 
 _group.add_argument('--as-manifest', action='store_true', dest='manifest',
@@ -236,10 +235,11 @@ Do not save upload state in a cache file for resuming.
 """)
 
 _group = upload_opts.add_mutually_exclusive_group()
-_group.add_argument('--trash-at', metavar='YYYY-MM-DD HH:MM', default=None,
+_group.add_argument('--trash-at', metavar='YYYY-MM-DDTHH:MM', default=None,
                     help="""
 Set the trash date of the resulting collection to an absolute date in the future.
-The accepted format is defined by the ISO 8601 standard.
+The accepted format is defined by the ISO 8601 standard. Examples: 20090103, 2009-01-03, 20090103T181505, 2009-01-03T18:15:05.\n
+Timezone information can be added. If not, the provided date/time is assumed as being in the local system's timezone.
 """)
 _group.add_argument('--trash-after', type=int, metavar='DAYS', default=None,
                     help="""
@@ -486,8 +486,11 @@ class ArvPutUploadJob(object):
         self.exclude_names = exclude_names
         self._trash_at = trash_at
 
-        if self._trash_at is not None and type(self._trash_at) not in [datetime.datetime, datetime.timedelta]:
-            raise TypeError('trash_at should be None, datetime or timedelta')
+        if self._trash_at is not None:
+            if type(self._trash_at) not in [datetime.datetime, datetime.timedelta]:
+                raise TypeError('trash_at should be None, timezone-naive datetime or timedelta')
+            if type(self._trash_at) == datetime.datetime and self._trash_at.tzinfo is not None:
+                raise TypeError('provided trash_at datetime should be timezone-naive')
 
         if not self.use_cache and self.resume:
             raise ArvPutArgumentConflict('resume cannot be True when use_cache is False')
@@ -719,6 +722,15 @@ class ArvPutUploadJob(object):
                     self._save_state()
                 except Exception as e:
                     self.logger.error("Unexpected error trying to save cache file: {}".format(e))
+            # Keep remote collection's trash_at attribute synced when using relative expire dates
+            if self._remote_collection is not None and type(self._trash_at) == datetime.timedelta:
+                try:
+                    self._api_client.collections().update(
+                        uuid=self._remote_collection.manifest_locator(),
+                        body={'trash_at': self._collection_trash_at().strftime("%Y-%m-%dT%H:%M:%S.%fZ")}
+                    ).execute(num_retries=self.num_retries)
+                except Exception as e:
+                    self.logger.error("Unexpected error trying to update remote collection's expire date: {}".format(e))
         else:
             self.bytes_written = self.bytes_skipped
         # Call the reporter, if any
@@ -730,7 +742,7 @@ class ArvPutUploadJob(object):
 
     def _write_stdin(self, filename):
         output = self._local_collection.open(filename, 'wb')
-        self._write(sys.stdin, output)
+        self._write(sys.stdin.buffer, output)
         output.close()
 
     def _check_file(self, source, filename):
@@ -854,7 +866,9 @@ class ArvPutUploadJob(object):
                                           update_collection):
             try:
                 self._remote_collection = arvados.collection.Collection(
-                    update_collection, api_client=self._api_client)
+                    update_collection,
+                    api_client=self._api_client,
+                    num_retries=self.num_retries)
             except arvados.errors.ApiError as error:
                 raise CollectionUpdateError("Cannot read collection {} ({})".format(update_collection, error))
             else:
@@ -897,7 +911,8 @@ class ArvPutUploadJob(object):
                 self._state['manifest'],
                 replication_desired=self.replication_desired,
                 put_threads=self.put_threads,
-                api_client=self._api_client)
+                api_client=self._api_client,
+                num_retries=self.num_retries)
 
     def _cached_manifest_valid(self):
         """
@@ -989,6 +1004,9 @@ class ArvPutUploadJob(object):
     def collection_name(self):
         return self._my_collection().api_response()['name'] if self._my_collection().api_response() else None
 
+    def collection_trash_at(self):
+        return self._my_collection().get_trash_at()
+
     def manifest_locator(self):
         return self._my_collection().manifest_locator()
 
@@ -1107,22 +1125,38 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
     # Trash arguments validation
     trash_at = None
     if args.trash_at is not None:
+        # ciso8601 considers YYYYMM as invalid but YYYY-MM as valid, so here we
+        # make sure the user provides a complete YYYY-MM-DD date.
+        if not re.match(r'^\d{4}(?P<dash>-?)\d{2}?(?P=dash)\d{2}', args.trash_at):
+            logger.error("--trash-at argument format invalid, use --help to see examples.")
+            sys.exit(1)
+        # Check if no time information was provided. In that case, assume end-of-day.
+        if re.match(r'^\d{4}(?P<dash>-?)\d{2}?(?P=dash)\d{2}$', args.trash_at):
+            args.trash_at += 'T23:59:59'
         try:
             trash_at = ciso8601.parse_datetime(args.trash_at)
         except:
-            logger.error("--trash-at argument format invalid, should be YYYY-MM-DDTHH:MM.")
+            logger.error("--trash-at argument format invalid, use --help to see examples.")
             sys.exit(1)
         else:
             if trash_at.tzinfo is not None:
-                # Timezone-aware datetime provided, convert to non-aware UTC
-                delta = trash_at.tzinfo.utcoffset(None)
-                trash_at = trash_at.replace(tzinfo=None) - delta
+                # Timezone aware datetime provided.
+                utcoffset = -trash_at.utcoffset()
+            else:
+                # Timezone naive datetime provided. Assume is local.
+                if time.daylight:
+                    utcoffset = datetime.timedelta(seconds=time.altzone)
+                else:
+                    utcoffset = datetime.timedelta(seconds=time.timezone)
+            # Convert to UTC timezone naive datetime.
+            trash_at = trash_at.replace(tzinfo=None) + utcoffset
+
         if trash_at <= datetime.datetime.utcnow():
-            logger.error("--trash-at argument should be set in the future")
+            logger.error("--trash-at argument must be set in the future")
             sys.exit(1)
     if args.trash_after is not None:
         if args.trash_after < 1:
-            logger.error("--trash-after argument should be >= 1")
+            logger.error("--trash-after argument must be >= 1")
             sys.exit(1)
         trash_at = datetime.timedelta(seconds=(args.trash_after * 24 * 60 * 60))
 
@@ -1284,10 +1318,21 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
         output = ','.join(writer.data_locators())
     else:
         try:
+            expiration_notice = ""
+            if writer.collection_trash_at() is not None:
+                # Get the local timezone-naive version, and log it with timezone information.
+                if time.daylight:
+                    local_trash_at = writer.collection_trash_at().replace(tzinfo=None) - datetime.timedelta(seconds=time.altzone)
+                else:
+                    local_trash_at = writer.collection_trash_at().replace(tzinfo=None) - datetime.timedelta(seconds=time.timezone)
+                expiration_notice = ". It will expire on {} {}.".format(
+                    local_trash_at.strftime("%Y-%m-%d %H:%M:%S"), time.strftime("%z"))
             if args.update_collection:
-                logger.info(u"Collection updated: '{}'".format(writer.collection_name()))
+                logger.info(u"Collection updated: '{}'{}".format(
+                    writer.collection_name(), expiration_notice))
             else:
-                logger.info(u"Collection saved as '{}'".format(writer.collection_name()))
+                logger.info(u"Collection saved as '{}'{}".format(
+                    writer.collection_name(), expiration_notice))
             if args.portable_data_hash:
                 output = writer.portable_data_hash()
             else: