6587: Documentation to access an arvados VM using webshell. Updated pages such as...
[arvados.git] / crunch_scripts / crunchutil / vwd.py
index 5b9edf5e8395a0343a871c17a31aae69a585ba25..0ae1c4620995014f61d17379bca756d2415f6e4c 100644 (file)
@@ -40,7 +40,7 @@ def checkin(target_dir):
     Keep as normal files (Keep does not support symlinks).
 
     Symlinks to files in the keep mount will result in files in the new
-    collection which reference existing Keep blocks, no data copying necessay.
+    collection which reference existing Keep blocks, no data copying necessary.
 
     Returns a new Collection object, with data flushed but the collection record
     not saved to the API.
@@ -56,6 +56,7 @@ def checkin(target_dir):
 
     logger = logging.getLogger("arvados")
 
+    last_error = None
     for root, dirs, files in os.walk(target_dir):
         for f in files:
             try:
@@ -72,6 +73,12 @@ def checkin(target_dir):
                     if pdh is not None:
                         # 2. load collection
                         if pdh not in collections:
+                            # 2.1 make sure it is flushed (see #5787 note 11)
+                            fd = os.open(real[0], os.O_RDONLY)
+                            os.fsync(fd)
+                            os.close(fd)
+
+                            # 2.2 get collection from API server
                             collections[pdh] = arvados.collection.CollectionReader(pdh,
                                                                                    api_client=outputcollection._my_api(),
                                                                                    keep_client=outputcollection._my_keep(),
@@ -91,5 +98,6 @@ def checkin(target_dir):
                                 dat = reader.read(64*1024)
             except (IOError, OSError) as e:
                 logger.error(e)
+                last_error = e
 
-    return outputcollection
+    return (outputcollection, last_error)