2 * Copyright (C) The Arvados Authors. All rights reserved.
4 * SPDX-License-Identifier: AGPL-3.0 OR Apache-2.0
8 package org.arvados.client.logic.keep;
10 import com.google.common.collect.Lists;
11 import org.arvados.client.api.client.CollectionsApiClient;
12 import org.arvados.client.api.client.KeepWebApiClient;
13 import org.arvados.client.api.model.Collection;
14 import org.arvados.client.common.Characters;
15 import org.arvados.client.exception.ArvadosClientException;
16 import org.arvados.client.logic.collection.FileToken;
17 import org.arvados.client.logic.collection.ManifestDecoder;
18 import org.arvados.client.logic.collection.ManifestStream;
19 import org.arvados.client.logic.keep.exception.DownloadFolderAlreadyExistsException;
20 import org.arvados.client.logic.keep.exception.FileAlreadyExistsException;
21 import org.slf4j.Logger;
24 import java.io.FileOutputStream;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.RandomAccessFile;
28 import java.util.ArrayList;
29 import java.util.List;
30 import java.util.concurrent.CompletableFuture;
31 import java.util.stream.Collectors;
32 import java.util.stream.Stream;
34 public class FileDownloader {
36 private final KeepClient keepClient;
37 private final ManifestDecoder manifestDecoder;
38 private final CollectionsApiClient collectionsApiClient;
39 private final KeepWebApiClient keepWebApiClient;
40 private final Logger log = org.slf4j.LoggerFactory.getLogger(FileDownloader.class);
42 public FileDownloader(KeepClient keepClient, ManifestDecoder manifestDecoder, CollectionsApiClient collectionsApiClient, KeepWebApiClient keepWebApiClient) {
43 this.keepClient = keepClient;
44 this.manifestDecoder = manifestDecoder;
45 this.collectionsApiClient = collectionsApiClient;
46 this.keepWebApiClient = keepWebApiClient;
49 public List<FileToken> listFileInfoFromCollection(String collectionUuid) {
50 Collection requestedCollection = collectionsApiClient.get(collectionUuid);
51 String manifestText = requestedCollection.getManifestText();
53 // decode manifest text and get list of all FileTokens for this collection
54 return manifestDecoder.decode(manifestText)
56 .flatMap(p -> p.getFileTokens().stream())
57 .collect(Collectors.toList());
60 public File downloadSingleFileUsingKeepWeb(String filePathName, String collectionUuid, String pathToDownloadFolder) {
61 FileToken fileToken = getFileTokenFromCollection(filePathName, collectionUuid);
62 if (fileToken == null) {
63 throw new ArvadosClientException(String.format("%s not found in Collection with UUID %s", filePathName, collectionUuid));
66 File downloadedFile = checkIfFileExistsInTargetLocation(fileToken, pathToDownloadFolder);
67 try (FileOutputStream fos = new FileOutputStream(downloadedFile)) {
68 fos.write(keepWebApiClient.download(collectionUuid, filePathName));
69 } catch (IOException e) {
70 throw new ArvadosClientException(String.format("Unable to write down file %s", fileToken.getFileName()), e);
72 return downloadedFile;
75 public File downloadFileWithResume(String collectionUuid, String fileName, String pathToDownloadFolder, long start, Long end) throws IOException {
76 if (end != null && end < start) {
77 throw new IllegalArgumentException("End index must be greater than or equal to the start index");
80 File destinationFile = new File(pathToDownloadFolder, fileName);
82 if (!destinationFile.exists()) {
83 boolean isCreated = destinationFile.createNewFile();
85 throw new IOException("Failed to create new file: " + destinationFile.getAbsolutePath());
89 try (RandomAccessFile outputFile = new RandomAccessFile(destinationFile, "rw");
90 InputStream inputStream = keepWebApiClient.get(collectionUuid, fileName, start, end)) {
91 outputFile.seek(start);
93 long remaining = (end == null) ? Long.MAX_VALUE : end - start + 1;
94 byte[] buffer = new byte[4096];
96 while ((bytesRead = inputStream.read(buffer)) != -1 && remaining > 0) {
97 int bytesToWrite = (int) Math.min(bytesRead, remaining);
98 outputFile.write(buffer, 0, bytesToWrite);
99 remaining -= bytesToWrite;
103 return destinationFile;
106 public List<File> downloadFilesFromCollectionUsingKeepWeb(String collectionUuid, String pathToDownloadFolder) {
107 String collectionTargetDir = setTargetDirectory(collectionUuid, pathToDownloadFolder).getAbsolutePath();
108 List<FileToken> fileTokens = listFileInfoFromCollection(collectionUuid);
110 List<CompletableFuture<File>> futures = Lists.newArrayList();
111 for (FileToken fileToken : fileTokens) {
112 futures.add(CompletableFuture.supplyAsync(() -> this.downloadOneFileFromCollectionUsingKeepWeb(fileToken, collectionUuid, collectionTargetDir)));
115 @SuppressWarnings("unchecked")
116 CompletableFuture<File>[] array = futures.toArray(new CompletableFuture[0]);
117 return Stream.of(array)
118 .map(CompletableFuture::join).collect(Collectors.toList());
121 private FileToken getFileTokenFromCollection(String filePathName, String collectionUuid) {
122 return listFileInfoFromCollection(collectionUuid)
124 .filter(p -> (p.getFullPath()).equals(filePathName))
129 private File checkIfFileExistsInTargetLocation(FileToken fileToken, String pathToDownloadFolder) {
130 String fileName = fileToken.getFileName();
132 File downloadFile = new File(pathToDownloadFolder + Characters.SLASH + fileName);
133 if (downloadFile.exists()) {
134 throw new FileAlreadyExistsException(String.format("File %s exists in location %s", fileName, pathToDownloadFolder));
140 private File downloadOneFileFromCollectionUsingKeepWeb(FileToken fileToken, String collectionUuid, String pathToDownloadFolder) {
141 String filePathName = fileToken.getPath() + fileToken.getFileName();
142 File downloadedFile = new File(pathToDownloadFolder + Characters.SLASH + filePathName);
143 downloadedFile.getParentFile().mkdirs();
145 try (FileOutputStream fos = new FileOutputStream(downloadedFile)) {
146 fos.write(keepWebApiClient.download(collectionUuid, filePathName));
147 } catch (IOException e) {
148 throw new RuntimeException(e);
150 return downloadedFile;
153 public List<File> downloadFilesFromCollection(String collectionUuid, String pathToDownloadFolder) {
155 // download requested collection and extract manifest text
156 Collection requestedCollection = collectionsApiClient.get(collectionUuid);
157 String manifestText = requestedCollection.getManifestText();
159 // if directory with this collectionUUID does not exist - create one
160 // if exists - abort (throw exception)
161 File collectionTargetDir = setTargetDirectory(collectionUuid, pathToDownloadFolder);
163 // decode manifest text and create list of ManifestStream objects containing KeepLocators and FileTokens
164 List<ManifestStream> manifestStreams = manifestDecoder.decode(manifestText);
166 //list of all downloaded files that will be returned by this method
167 List<File> downloadedFilesFromCollection = new ArrayList<>();
169 // download files for each manifest stream
170 for (ManifestStream manifestStream : manifestStreams)
171 downloadedFilesFromCollection.addAll(downloadFilesFromSingleManifestStream(manifestStream, collectionTargetDir));
173 log.debug(String.format("Total of: %d files downloaded", downloadedFilesFromCollection.size()));
174 return downloadedFilesFromCollection;
177 private File setTargetDirectory(String collectionUUID, String pathToDownloadFolder) {
178 //local directory to save downloaded files
179 File collectionTargetDir = new File(pathToDownloadFolder + Characters.SLASH + collectionUUID);
180 if (collectionTargetDir.exists()) {
181 throw new DownloadFolderAlreadyExistsException(String.format("Directory for collection UUID %s already exists", collectionUUID));
183 collectionTargetDir.mkdirs();
185 return collectionTargetDir;
188 private List<File> downloadFilesFromSingleManifestStream(ManifestStream manifestStream, File collectionTargetDir){
189 List<File> downloadedFiles = new ArrayList<>();
190 List<KeepLocator> keepLocators = manifestStream.getKeepLocators();
191 DownloadHelper downloadHelper = new DownloadHelper(keepLocators);
193 for (FileToken fileToken : manifestStream.getFileTokens()) {
194 File downloadedFile = new File(collectionTargetDir.getAbsolutePath() + Characters.SLASH + fileToken.getFullPath()); //create file
195 downloadedFile.getParentFile().mkdirs();
197 try (FileOutputStream fos = new FileOutputStream(downloadedFile, true)) {
198 downloadHelper.setBytesToDownload(fileToken.getFileSize()); //update file size info
200 //this part needs to be repeated for each file until whole file is downloaded
202 downloadHelper.requestNewDataChunk(); //check if new data chunk needs to be downloaded
203 downloadHelper.writeDownFile(fos); // download data from chunk
204 } while (downloadHelper.getBytesToDownload() != 0);
206 } catch (IOException | ArvadosClientException e) {
207 throw new ArvadosClientException(String.format("Unable to write down file %s", fileToken.getFileName()), e);
210 downloadedFiles.add(downloadedFile);
211 log.debug(String.format("File %d / %d downloaded from manifest stream",
212 manifestStream.getFileTokens().indexOf(fileToken) + 1,
213 manifestStream.getFileTokens().size()));
215 return downloadedFiles;
218 private class DownloadHelper {
220 // values for tracking file output streams and matching data chunks with initial files
221 int currentDataChunkNumber;
222 int bytesDownloadedFromChunk;
223 long bytesToDownload;
224 byte[] currentDataChunk;
225 boolean remainingDataInChunk;
226 final List<KeepLocator> keepLocators;
228 private DownloadHelper(List<KeepLocator> keepLocators) {
229 currentDataChunkNumber = -1;
230 bytesDownloadedFromChunk = 0;
231 remainingDataInChunk = false;
232 this.keepLocators = keepLocators;
235 private long getBytesToDownload() {
236 return bytesToDownload;
239 private void setBytesToDownload(long bytesToDownload) {
240 this.bytesToDownload = bytesToDownload;
243 private void requestNewDataChunk() {
244 if (!remainingDataInChunk) {
245 currentDataChunkNumber++;
246 if (currentDataChunkNumber < keepLocators.size()) {
247 //swap data chunk for next one
248 currentDataChunk = keepClient.getDataChunk(keepLocators.get(currentDataChunkNumber));
249 log.debug(String.format("%d of %d data chunks from manifest stream downloaded", currentDataChunkNumber + 1, keepLocators.size()));
251 throw new ArvadosClientException("Data chunk required for download is missing.");
256 private void writeDownFile(FileOutputStream fos) throws IOException {
257 //case 1: more bytes needed than available in current chunk (or whole current chunk needed) to download file
258 if (bytesToDownload >= currentDataChunk.length - bytesDownloadedFromChunk) {
259 writeDownWholeDataChunk(fos);
261 //case 2: current data chunk contains more bytes than is needed for this file
263 writeDownDataChunkPartially(fos);
267 private void writeDownWholeDataChunk(FileOutputStream fos) throws IOException {
268 // write all remaining bytes from current chunk
269 fos.write(currentDataChunk, bytesDownloadedFromChunk, currentDataChunk.length - bytesDownloadedFromChunk);
270 //update bytesToDownload
271 bytesToDownload -= (currentDataChunk.length - bytesDownloadedFromChunk);
272 // set remaining data in chunk to false
273 remainingDataInChunk = false;
274 //reset bytesDownloadedFromChunk so that its set to 0 for the next chunk
275 bytesDownloadedFromChunk = 0;
278 private void writeDownDataChunkPartially(FileOutputStream fos) throws IOException {
279 //write all remaining bytes for this file from current chunk
280 fos.write(currentDataChunk, bytesDownloadedFromChunk, (int) bytesToDownload);
281 // update number of bytes downloaded from this chunk
282 bytesDownloadedFromChunk += bytesToDownload;
283 // set remaining data in chunk to true
284 remainingDataInChunk = true;
285 // reset bytesToDownload to exit while loop and move to the next file