- s = os.lstat(os.path.join(root, f))
- if stat.S_ISLNK(s.st_mode):
- # 1. check if it is a link into a collection
- real = os.path.split(os.path.realpath(os.path.join(root, f)))
- (pdh, branch) = arvados.command.run.is_in_collection(real[0], real[1])
- if pdh is not None:
- # 2. load collection
- if pdh not in collections:
- collections[pdh] = arvados.collection.CollectionReader(pdh,
- api_client=outputcollection._my_api(),
- keep_client=outputcollection._my_keep(),
- num_retries=5)
- # 3. copy arvfile to new collection
- outputcollection.copy(branch, branch, source_collection=collections[pdh])
-
- elif stat.S_ISREG(s.st_mode):
- reldir = root[len(target_dir):]
- with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
- with open(os.path.join(root, f), "rb") as reader:
- dat = reader.read(64*1024)
- while dat:
- writer.write(dat)
+ try:
+ s = os.lstat(os.path.join(root, f))
+
+ writeIt = False
+
+ if stat.S_ISREG(s.st_mode):
+ writeIt = True
+ elif stat.S_ISLNK(s.st_mode):
+ # 1. check if it is a link into a collection
+ real = os.path.split(os.path.realpath(os.path.join(root, f)))
+ (pdh, branch) = arvados.commands.run.is_in_collection(real[0], real[1])
+ if pdh is not None:
+ # 2. load collection
+ if pdh not in collections:
+ # 2.1 make sure it is flushed (see #5787 note 11)
+ fd = os.open(real[0], os.O_RDONLY)
+ os.fsync(fd)
+ os.close(fd)
+
+ # 2.2 get collection from API server
+ collections[pdh] = arvados.collection.CollectionReader(pdh,
+ api_client=outputcollection._my_api(),
+ keep_client=outputcollection._my_keep(),
+ num_retries=5)
+ # 3. copy arvfile to new collection
+ outputcollection.copy(branch, os.path.join(root[len(target_dir):], f), source_collection=collections[pdh])
+ else:
+ writeIt = True
+
+ if writeIt:
+ reldir = root[len(target_dir):]
+ with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
+ with open(os.path.join(root, f), "rb") as reader: