12183: Improve error handling if unable to delete symlinks marked for deletion.
[arvados.git] / crunch_scripts / crunchutil / vwd.py
index 5b9edf5e8395a0343a871c17a31aae69a585ba25..3245da14b3e3658f1a6ddc4da68e9960e1b7849d 100644 (file)
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import arvados
 import os
 import stat
@@ -40,7 +44,7 @@ def checkin(target_dir):
     Keep as normal files (Keep does not support symlinks).
 
     Symlinks to files in the keep mount will result in files in the new
-    collection which reference existing Keep blocks, no data copying necessay.
+    collection which reference existing Keep blocks, no data copying necessary.
 
     Returns a new Collection object, with data flushed but the collection record
     not saved to the API.
@@ -56,6 +60,7 @@ def checkin(target_dir):
 
     logger = logging.getLogger("arvados")
 
+    last_error = None
     for root, dirs, files in os.walk(target_dir):
         for f in files:
             try:
@@ -72,6 +77,12 @@ def checkin(target_dir):
                     if pdh is not None:
                         # 2. load collection
                         if pdh not in collections:
+                            # 2.1 make sure it is flushed (see #5787 note 11)
+                            fd = os.open(real[0], os.O_RDONLY)
+                            os.fsync(fd)
+                            os.close(fd)
+
+                            # 2.2 get collection from API server
                             collections[pdh] = arvados.collection.CollectionReader(pdh,
                                                                                    api_client=outputcollection._my_api(),
                                                                                    keep_client=outputcollection._my_keep(),
@@ -91,5 +102,6 @@ def checkin(target_dir):
                                 dat = reader.read(64*1024)
             except (IOError, OSError) as e:
                 logger.error(e)
+                last_error = e
 
-    return outputcollection
+    return (outputcollection, last_error)