10865: Update documentation, replace disable_api_methods with enable_legacy_jobs_api
[arvados.git] / crunch_scripts / crunchutil / vwd.py
index 5b9edf5e8395a0343a871c17a31aae69a585ba25..3245da14b3e3658f1a6ddc4da68e9960e1b7849d 100644 (file)
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import arvados
 import os
 import stat
 import arvados
 import os
 import stat
@@ -40,7 +44,7 @@ def checkin(target_dir):
     Keep as normal files (Keep does not support symlinks).
 
     Symlinks to files in the keep mount will result in files in the new
     Keep as normal files (Keep does not support symlinks).
 
     Symlinks to files in the keep mount will result in files in the new
-    collection which reference existing Keep blocks, no data copying necessay.
+    collection which reference existing Keep blocks, no data copying necessary.
 
     Returns a new Collection object, with data flushed but the collection record
     not saved to the API.
 
     Returns a new Collection object, with data flushed but the collection record
     not saved to the API.
@@ -56,6 +60,7 @@ def checkin(target_dir):
 
     logger = logging.getLogger("arvados")
 
 
     logger = logging.getLogger("arvados")
 
+    last_error = None
     for root, dirs, files in os.walk(target_dir):
         for f in files:
             try:
     for root, dirs, files in os.walk(target_dir):
         for f in files:
             try:
@@ -72,6 +77,12 @@ def checkin(target_dir):
                     if pdh is not None:
                         # 2. load collection
                         if pdh not in collections:
                     if pdh is not None:
                         # 2. load collection
                         if pdh not in collections:
+                            # 2.1 make sure it is flushed (see #5787 note 11)
+                            fd = os.open(real[0], os.O_RDONLY)
+                            os.fsync(fd)
+                            os.close(fd)
+
+                            # 2.2 get collection from API server
                             collections[pdh] = arvados.collection.CollectionReader(pdh,
                                                                                    api_client=outputcollection._my_api(),
                                                                                    keep_client=outputcollection._my_keep(),
                             collections[pdh] = arvados.collection.CollectionReader(pdh,
                                                                                    api_client=outputcollection._my_api(),
                                                                                    keep_client=outputcollection._my_keep(),
@@ -91,5 +102,6 @@ def checkin(target_dir):
                                 dat = reader.read(64*1024)
             except (IOError, OSError) as e:
                 logger.error(e)
                                 dat = reader.read(64*1024)
             except (IOError, OSError) as e:
                 logger.error(e)
+                last_error = e
 
 
-    return outputcollection
+    return (outputcollection, last_error)