X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/0a87aad48d7fccfc4d7d56a8628370cb7370d792..928b996a2e1b1780eb760c8675edc315dd9d80df:/crunch_scripts/crunchutil/vwd.py diff --git a/crunch_scripts/crunchutil/vwd.py b/crunch_scripts/crunchutil/vwd.py index 5b9edf5e83..3245da14b3 100644 --- a/crunch_scripts/crunchutil/vwd.py +++ b/crunch_scripts/crunchutil/vwd.py @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + import arvados import os import stat @@ -40,7 +44,7 @@ def checkin(target_dir): Keep as normal files (Keep does not support symlinks). Symlinks to files in the keep mount will result in files in the new - collection which reference existing Keep blocks, no data copying necessay. + collection which reference existing Keep blocks, no data copying necessary. Returns a new Collection object, with data flushed but the collection record not saved to the API. @@ -56,6 +60,7 @@ def checkin(target_dir): logger = logging.getLogger("arvados") + last_error = None for root, dirs, files in os.walk(target_dir): for f in files: try: @@ -72,6 +77,12 @@ def checkin(target_dir): if pdh is not None: # 2. load collection if pdh not in collections: + # 2.1 make sure it is flushed (see #5787 note 11) + fd = os.open(real[0], os.O_RDONLY) + os.fsync(fd) + os.close(fd) + + # 2.2 get collection from API server collections[pdh] = arvados.collection.CollectionReader(pdh, api_client=outputcollection._my_api(), keep_client=outputcollection._my_keep(), @@ -91,5 +102,6 @@ def checkin(target_dir): dat = reader.read(64*1024) except (IOError, OSError) as e: logger.error(e) + last_error = e - return outputcollection + return (outputcollection, last_error)