X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/5b970a6c9505527e146e73cb4756a64ecc1679cd..cefddd8efdc88c3d33e5aad4c02e96c64cbe8242:/crunch_scripts/crunchutil/vwd.py diff --git a/crunch_scripts/crunchutil/vwd.py b/crunch_scripts/crunchutil/vwd.py index 5e6f1bec63..3245da14b3 100644 --- a/crunch_scripts/crunchutil/vwd.py +++ b/crunch_scripts/crunchutil/vwd.py @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + import arvados import os import stat @@ -56,7 +60,7 @@ def checkin(target_dir): logger = logging.getLogger("arvados") - caught_error = False + last_error = None for root, dirs, files in os.walk(target_dir): for f in files: try: @@ -73,6 +77,12 @@ def checkin(target_dir): if pdh is not None: # 2. load collection if pdh not in collections: + # 2.1 make sure it is flushed (see #5787 note 11) + fd = os.open(real[0], os.O_RDONLY) + os.fsync(fd) + os.close(fd) + + # 2.2 get collection from API server collections[pdh] = arvados.collection.CollectionReader(pdh, api_client=outputcollection._my_api(), keep_client=outputcollection._my_keep(), @@ -92,6 +102,6 @@ def checkin(target_dir): dat = reader.read(64*1024) except (IOError, OSError) as e: logger.error(e) - caught_error = True + last_error = e - return (outputcollection, caught_error) + return (outputcollection, last_error)