2 * Copyright (C) The Arvados Authors. All rights reserved.
4 * SPDX-License-Identifier: AGPL-3.0 OR Apache-2.0
8 package org.arvados.client.logic.keep;
10 import com.google.common.collect.Lists;
11 import org.arvados.client.api.client.CollectionsApiClient;
12 import org.arvados.client.api.client.KeepWebApiClient;
13 import org.arvados.client.api.model.Collection;
14 import org.arvados.client.common.Characters;
15 import org.arvados.client.exception.ArvadosClientException;
16 import org.arvados.client.logic.collection.FileToken;
17 import org.arvados.client.logic.collection.ManifestDecoder;
18 import org.arvados.client.logic.collection.ManifestStream;
19 import org.arvados.client.logic.keep.exception.DownloadFolderAlreadyExistsException;
20 import org.arvados.client.logic.keep.exception.FileAlreadyExistsException;
21 import org.slf4j.Logger;
24 import java.io.FileOutputStream;
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.concurrent.CompletableFuture;
29 import java.util.stream.Collectors;
30 import java.util.stream.Stream;
32 public class FileDownloader {
34 private final KeepClient keepClient;
35 private final ManifestDecoder manifestDecoder;
36 private final CollectionsApiClient collectionsApiClient;
37 private final KeepWebApiClient keepWebApiClient;
38 private final Logger log = org.slf4j.LoggerFactory.getLogger(FileDownloader.class);
40 public FileDownloader(KeepClient keepClient, ManifestDecoder manifestDecoder, CollectionsApiClient collectionsApiClient, KeepWebApiClient keepWebApiClient) {
41 this.keepClient = keepClient;
42 this.manifestDecoder = manifestDecoder;
43 this.collectionsApiClient = collectionsApiClient;
44 this.keepWebApiClient = keepWebApiClient;
47 public List<FileToken> listFileInfoFromCollection(String collectionUuid) {
48 Collection requestedCollection = collectionsApiClient.get(collectionUuid);
49 String manifestText = requestedCollection.getManifestText();
51 // decode manifest text and get list of all FileTokens for this collection
52 return manifestDecoder.decode(manifestText)
54 .flatMap(p -> p.getFileTokens().stream())
55 .collect(Collectors.toList());
58 public File downloadSingleFileUsingKeepWeb(String filePathName, String collectionUuid, String pathToDownloadFolder) {
59 FileToken fileToken = getFileTokenFromCollection(filePathName, collectionUuid);
60 if (fileToken == null) {
61 throw new ArvadosClientException(String.format("%s not found in Collection with UUID %s", filePathName, collectionUuid));
64 File downloadedFile = checkIfFileExistsInTargetLocation(fileToken, pathToDownloadFolder);
65 try (FileOutputStream fos = new FileOutputStream(downloadedFile)) {
66 fos.write(keepWebApiClient.download(collectionUuid, filePathName));
67 } catch (IOException e) {
68 throw new ArvadosClientException(String.format("Unable to write down file %s", fileToken.getFileName()), e);
70 return downloadedFile;
73 public List<File> downloadFilesFromCollectionUsingKeepWeb(String collectionUuid, String pathToDownloadFolder) {
74 String collectionTargetDir = setTargetDirectory(collectionUuid, pathToDownloadFolder).getAbsolutePath();
75 List<FileToken> fileTokens = listFileInfoFromCollection(collectionUuid);
77 List<CompletableFuture<File>> futures = Lists.newArrayList();
78 for (FileToken fileToken : fileTokens) {
79 futures.add(CompletableFuture.supplyAsync(() -> this.downloadOneFileFromCollectionUsingKeepWeb(fileToken, collectionUuid, collectionTargetDir)));
82 @SuppressWarnings("unchecked")
83 CompletableFuture<File>[] array = futures.toArray(new CompletableFuture[0]);
84 return Stream.of(array)
85 .map(CompletableFuture::join).collect(Collectors.toList());
88 private FileToken getFileTokenFromCollection(String filePathName, String collectionUuid) {
89 return listFileInfoFromCollection(collectionUuid)
91 .filter(p -> (p.getFullPath()).equals(filePathName))
96 private File checkIfFileExistsInTargetLocation(FileToken fileToken, String pathToDownloadFolder) {
97 String fileName = fileToken.getFileName();
99 File downloadFile = new File(pathToDownloadFolder + Characters.SLASH + fileName);
100 if (downloadFile.exists()) {
101 throw new FileAlreadyExistsException(String.format("File %s exists in location %s", fileName, pathToDownloadFolder));
107 private File downloadOneFileFromCollectionUsingKeepWeb(FileToken fileToken, String collectionUuid, String pathToDownloadFolder) {
108 String filePathName = fileToken.getPath() + fileToken.getFileName();
109 File downloadedFile = new File(pathToDownloadFolder + Characters.SLASH + filePathName);
110 downloadedFile.getParentFile().mkdirs();
112 try (FileOutputStream fos = new FileOutputStream(downloadedFile)) {
113 fos.write(keepWebApiClient.download(collectionUuid, filePathName));
114 } catch (IOException e) {
115 throw new RuntimeException(e);
117 return downloadedFile;
120 public List<File> downloadFilesFromCollection(String collectionUuid, String pathToDownloadFolder) {
122 // download requested collection and extract manifest text
123 Collection requestedCollection = collectionsApiClient.get(collectionUuid);
124 String manifestText = requestedCollection.getManifestText();
126 // if directory with this collectionUUID does not exist - create one
127 // if exists - abort (throw exception)
128 File collectionTargetDir = setTargetDirectory(collectionUuid, pathToDownloadFolder);
130 // decode manifest text and create list of ManifestStream objects containing KeepLocators and FileTokens
131 List<ManifestStream> manifestStreams = manifestDecoder.decode(manifestText);
133 //list of all downloaded files that will be returned by this method
134 List<File> downloadedFilesFromCollection = new ArrayList<>();
136 // download files for each manifest stream
137 for (ManifestStream manifestStream : manifestStreams)
138 downloadedFilesFromCollection.addAll(downloadFilesFromSingleManifestStream(manifestStream, collectionTargetDir));
140 log.debug(String.format("Total of: %d files downloaded", downloadedFilesFromCollection.size()));
141 return downloadedFilesFromCollection;
144 private File setTargetDirectory(String collectionUUID, String pathToDownloadFolder) {
145 //local directory to save downloaded files
146 File collectionTargetDir = new File(pathToDownloadFolder + Characters.SLASH + collectionUUID);
147 if (collectionTargetDir.exists()) {
148 throw new DownloadFolderAlreadyExistsException(String.format("Directory for collection UUID %s already exists", collectionUUID));
150 collectionTargetDir.mkdirs();
152 return collectionTargetDir;
155 private List<File> downloadFilesFromSingleManifestStream(ManifestStream manifestStream, File collectionTargetDir){
156 List<File> downloadedFiles = new ArrayList<>();
157 List<KeepLocator> keepLocators = manifestStream.getKeepLocators();
158 DownloadHelper downloadHelper = new DownloadHelper(keepLocators);
160 for (FileToken fileToken : manifestStream.getFileTokens()) {
161 File downloadedFile = new File(collectionTargetDir.getAbsolutePath() + Characters.SLASH + fileToken.getFullPath()); //create file
162 downloadedFile.getParentFile().mkdirs();
164 try (FileOutputStream fos = new FileOutputStream(downloadedFile, true)) {
165 downloadHelper.setBytesToDownload(fileToken.getFileSize()); //update file size info
167 //this part needs to be repeated for each file until whole file is downloaded
169 downloadHelper.requestNewDataChunk(); //check if new data chunk needs to be downloaded
170 downloadHelper.writeDownFile(fos); // download data from chunk
171 } while (downloadHelper.getBytesToDownload() != 0);
173 } catch (IOException | ArvadosClientException e) {
174 throw new ArvadosClientException(String.format("Unable to write down file %s", fileToken.getFileName()), e);
177 downloadedFiles.add(downloadedFile);
178 log.debug(String.format("File %d / %d downloaded from manifest stream",
179 manifestStream.getFileTokens().indexOf(fileToken) + 1,
180 manifestStream.getFileTokens().size()));
182 return downloadedFiles;
185 private class DownloadHelper {
187 // values for tracking file output streams and matching data chunks with initial files
188 int currentDataChunkNumber;
189 int bytesDownloadedFromChunk;
190 long bytesToDownload;
191 byte[] currentDataChunk;
192 boolean remainingDataInChunk;
193 final List<KeepLocator> keepLocators;
195 private DownloadHelper(List<KeepLocator> keepLocators) {
196 currentDataChunkNumber = -1;
197 bytesDownloadedFromChunk = 0;
198 remainingDataInChunk = false;
199 this.keepLocators = keepLocators;
202 private long getBytesToDownload() {
203 return bytesToDownload;
206 private void setBytesToDownload(long bytesToDownload) {
207 this.bytesToDownload = bytesToDownload;
210 private void requestNewDataChunk() {
211 if (!remainingDataInChunk) {
212 currentDataChunkNumber++;
213 if (currentDataChunkNumber < keepLocators.size()) {
214 //swap data chunk for next one
215 currentDataChunk = keepClient.getDataChunk(keepLocators.get(currentDataChunkNumber));
216 log.debug(String.format("%d of %d data chunks from manifest stream downloaded", currentDataChunkNumber + 1, keepLocators.size()));
218 throw new ArvadosClientException("Data chunk required for download is missing.");
223 private void writeDownFile(FileOutputStream fos) throws IOException {
224 //case 1: more bytes needed than available in current chunk (or whole current chunk needed) to download file
225 if (bytesToDownload >= currentDataChunk.length - bytesDownloadedFromChunk) {
226 writeDownWholeDataChunk(fos);
228 //case 2: current data chunk contains more bytes than is needed for this file
230 writeDownDataChunkPartially(fos);
234 private void writeDownWholeDataChunk(FileOutputStream fos) throws IOException {
235 // write all remaining bytes from current chunk
236 fos.write(currentDataChunk, bytesDownloadedFromChunk, currentDataChunk.length - bytesDownloadedFromChunk);
237 //update bytesToDownload
238 bytesToDownload -= (currentDataChunk.length - bytesDownloadedFromChunk);
239 // set remaining data in chunk to false
240 remainingDataInChunk = false;
241 //reset bytesDownloadedFromChunk so that its set to 0 for the next chunk
242 bytesDownloadedFromChunk = 0;
245 private void writeDownDataChunkPartially(FileOutputStream fos) throws IOException {
246 //write all remaining bytes for this file from current chunk
247 fos.write(currentDataChunk, bytesDownloadedFromChunk, (int) bytesToDownload);
248 // update number of bytes downloaded from this chunk
249 bytesDownloadedFromChunk += bytesToDownload;
250 // set remaining data in chunk to true
251 remainingDataInChunk = true;
252 // reset bytesToDownload to exit while loop and move to the next file