21700: Install Bundler system-wide in Rails postinst
[arvados.git] / sdk / java-v2 / src / main / java / org / arvados / client / logic / keep / FileDownloader.java
1 /*
2  * Copyright (C) The Arvados Authors. All rights reserved.
3  *
4  * SPDX-License-Identifier: AGPL-3.0 OR Apache-2.0
5  *
6  */
7
8 package org.arvados.client.logic.keep;
9
10 import com.google.common.collect.Lists;
11 import org.arvados.client.api.client.CollectionsApiClient;
12 import org.arvados.client.api.client.KeepWebApiClient;
13 import org.arvados.client.api.model.Collection;
14 import org.arvados.client.common.Characters;
15 import org.arvados.client.exception.ArvadosClientException;
16 import org.arvados.client.logic.collection.FileToken;
17 import org.arvados.client.logic.collection.ManifestDecoder;
18 import org.arvados.client.logic.collection.ManifestStream;
19 import org.arvados.client.logic.keep.exception.DownloadFolderAlreadyExistsException;
20 import org.arvados.client.logic.keep.exception.FileAlreadyExistsException;
21 import org.slf4j.Logger;
22
23 import java.io.File;
24 import java.io.FileOutputStream;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.RandomAccessFile;
28 import java.util.ArrayList;
29 import java.util.List;
30 import java.util.concurrent.CompletableFuture;
31 import java.util.stream.Collectors;
32 import java.util.stream.Stream;
33
34 public class FileDownloader {
35
36     private final KeepClient keepClient;
37     private final ManifestDecoder manifestDecoder;
38     private final CollectionsApiClient collectionsApiClient;
39     private final KeepWebApiClient keepWebApiClient;
40     private final Logger log = org.slf4j.LoggerFactory.getLogger(FileDownloader.class);
41
42     public FileDownloader(KeepClient keepClient, ManifestDecoder manifestDecoder, CollectionsApiClient collectionsApiClient, KeepWebApiClient keepWebApiClient) {
43         this.keepClient = keepClient;
44         this.manifestDecoder = manifestDecoder;
45         this.collectionsApiClient = collectionsApiClient;
46         this.keepWebApiClient = keepWebApiClient;
47     }
48
49     public List<FileToken> listFileInfoFromCollection(String collectionUuid) {
50         Collection requestedCollection = collectionsApiClient.get(collectionUuid);
51         String manifestText = requestedCollection.getManifestText();
52
53         // decode manifest text and get list of all FileTokens for this collection
54         return manifestDecoder.decode(manifestText)
55                 .stream()
56                 .flatMap(p -> p.getFileTokens().stream())
57                 .collect(Collectors.toList());
58     }
59
60     public File downloadSingleFileUsingKeepWeb(String filePathName, String collectionUuid, String pathToDownloadFolder) {
61         FileToken fileToken = getFileTokenFromCollection(filePathName, collectionUuid);
62         if (fileToken == null) {
63             throw new ArvadosClientException(String.format("%s not found in Collection with UUID %s", filePathName, collectionUuid));
64         }
65
66         File downloadedFile = checkIfFileExistsInTargetLocation(fileToken, pathToDownloadFolder);
67         try (FileOutputStream fos = new FileOutputStream(downloadedFile)) {
68             fos.write(keepWebApiClient.download(collectionUuid, filePathName));
69         } catch (IOException e) {
70             throw new ArvadosClientException(String.format("Unable to write down file %s", fileToken.getFileName()), e);
71         }
72         return downloadedFile;
73     }
74
75     public File downloadFileWithResume(String collectionUuid, String fileName, String pathToDownloadFolder, long start, Long end) throws IOException {
76         if (end != null && end < start) {
77             throw new IllegalArgumentException("End index must be greater than or equal to the start index");
78         }
79
80         File destinationFile = new File(pathToDownloadFolder, fileName);
81
82         if (!destinationFile.exists()) {
83             boolean isCreated = destinationFile.createNewFile();
84             if (!isCreated) {
85                 throw new IOException("Failed to create new file: " + destinationFile.getAbsolutePath());
86             }
87         }
88
89         try (RandomAccessFile outputFile = new RandomAccessFile(destinationFile, "rw");
90              InputStream inputStream = keepWebApiClient.get(collectionUuid, fileName, start, end)) {
91             outputFile.seek(start);
92
93             long remaining = (end == null) ? Long.MAX_VALUE : end - start + 1;
94             byte[] buffer = new byte[4096];
95             int bytesRead;
96             while ((bytesRead = inputStream.read(buffer)) != -1 && remaining > 0) {
97                 int bytesToWrite = (int) Math.min(bytesRead, remaining);
98                 outputFile.write(buffer, 0, bytesToWrite);
99                 remaining -= bytesToWrite;
100             }
101         }
102
103         return destinationFile;
104     }
105
106     public List<File> downloadFilesFromCollectionUsingKeepWeb(String collectionUuid, String pathToDownloadFolder) {
107         String collectionTargetDir = setTargetDirectory(collectionUuid, pathToDownloadFolder).getAbsolutePath();
108         List<FileToken> fileTokens = listFileInfoFromCollection(collectionUuid);
109
110         List<CompletableFuture<File>> futures = Lists.newArrayList();
111         for (FileToken fileToken : fileTokens) {
112             futures.add(CompletableFuture.supplyAsync(() -> this.downloadOneFileFromCollectionUsingKeepWeb(fileToken, collectionUuid, collectionTargetDir)));
113         }
114
115         @SuppressWarnings("unchecked")
116         CompletableFuture<File>[] array = futures.toArray(new CompletableFuture[0]);
117         return Stream.of(array)
118                 .map(CompletableFuture::join).collect(Collectors.toList());
119     }
120
121     private FileToken getFileTokenFromCollection(String filePathName, String collectionUuid) {
122         return listFileInfoFromCollection(collectionUuid)
123                 .stream()
124                 .filter(p -> (p.getFullPath()).equals(filePathName))
125                 .findFirst()
126                 .orElse(null);
127     }
128
129     private File checkIfFileExistsInTargetLocation(FileToken fileToken, String pathToDownloadFolder) {
130         String fileName = fileToken.getFileName();
131
132         File downloadFile = new File(pathToDownloadFolder + Characters.SLASH + fileName);
133         if (downloadFile.exists()) {
134             throw new FileAlreadyExistsException(String.format("File %s exists in location %s", fileName, pathToDownloadFolder));
135         } else {
136             return downloadFile;
137         }
138     }
139
140     private File downloadOneFileFromCollectionUsingKeepWeb(FileToken fileToken, String collectionUuid, String pathToDownloadFolder) {
141         String filePathName = fileToken.getPath() + fileToken.getFileName();
142         File downloadedFile = new File(pathToDownloadFolder + Characters.SLASH + filePathName);
143         downloadedFile.getParentFile().mkdirs();
144
145         try (FileOutputStream fos = new FileOutputStream(downloadedFile)) {
146             fos.write(keepWebApiClient.download(collectionUuid, filePathName));
147         } catch (IOException e) {
148             throw new RuntimeException(e);
149         }
150         return downloadedFile;
151     }
152
153     public List<File> downloadFilesFromCollection(String collectionUuid, String pathToDownloadFolder) {
154
155         // download requested collection and extract manifest text
156         Collection requestedCollection = collectionsApiClient.get(collectionUuid);
157         String manifestText = requestedCollection.getManifestText();
158
159         // if directory with this collectionUUID does not exist - create one
160         // if exists - abort (throw exception)
161         File collectionTargetDir = setTargetDirectory(collectionUuid, pathToDownloadFolder);
162
163         // decode manifest text and create list of ManifestStream objects containing KeepLocators and FileTokens
164         List<ManifestStream> manifestStreams = manifestDecoder.decode(manifestText);
165
166         //list of all downloaded files that will be returned by this method
167         List<File> downloadedFilesFromCollection = new ArrayList<>();
168
169         // download files for each manifest stream
170         for (ManifestStream manifestStream : manifestStreams)
171             downloadedFilesFromCollection.addAll(downloadFilesFromSingleManifestStream(manifestStream, collectionTargetDir));
172
173         log.debug(String.format("Total of: %d files downloaded", downloadedFilesFromCollection.size()));
174         return downloadedFilesFromCollection;
175     }
176
177     private File setTargetDirectory(String collectionUUID, String pathToDownloadFolder) {
178         //local directory to save downloaded files
179         File collectionTargetDir = new File(pathToDownloadFolder + Characters.SLASH + collectionUUID);
180         if (collectionTargetDir.exists()) {
181             throw new DownloadFolderAlreadyExistsException(String.format("Directory for collection UUID %s already exists", collectionUUID));
182         } else {
183             collectionTargetDir.mkdirs();
184         }
185         return collectionTargetDir;
186     }
187
188     private List<File> downloadFilesFromSingleManifestStream(ManifestStream manifestStream, File collectionTargetDir){
189         List<File> downloadedFiles = new ArrayList<>();
190         List<KeepLocator> keepLocators = manifestStream.getKeepLocators();
191         DownloadHelper downloadHelper = new DownloadHelper(keepLocators);
192
193         for (FileToken fileToken : manifestStream.getFileTokens()) {
194             File downloadedFile = new File(collectionTargetDir.getAbsolutePath() + Characters.SLASH + fileToken.getFullPath()); //create file
195             downloadedFile.getParentFile().mkdirs();
196
197             try (FileOutputStream fos = new FileOutputStream(downloadedFile, true)) {
198                 downloadHelper.setBytesToDownload(fileToken.getFileSize()); //update file size info
199
200                 //this part needs to be repeated for each file until whole file is downloaded
201                 do {
202                     downloadHelper.requestNewDataChunk(); //check if new data chunk needs to be downloaded
203                     downloadHelper.writeDownFile(fos); // download data from chunk
204                 } while (downloadHelper.getBytesToDownload() != 0);
205
206             } catch (IOException | ArvadosClientException e) {
207                 throw new ArvadosClientException(String.format("Unable to write down file %s", fileToken.getFileName()), e);
208             }
209
210             downloadedFiles.add(downloadedFile);
211             log.debug(String.format("File %d / %d downloaded from manifest stream",
212                     manifestStream.getFileTokens().indexOf(fileToken) + 1,
213                     manifestStream.getFileTokens().size()));
214         }
215         return downloadedFiles;
216     }
217
218     private class DownloadHelper {
219
220         // values for tracking file output streams and matching data chunks with initial files
221         int currentDataChunkNumber;
222         int bytesDownloadedFromChunk;
223         long bytesToDownload;
224         byte[] currentDataChunk;
225         boolean remainingDataInChunk;
226         final List<KeepLocator> keepLocators;
227
228         private DownloadHelper(List<KeepLocator> keepLocators) {
229             currentDataChunkNumber = -1;
230             bytesDownloadedFromChunk = 0;
231             remainingDataInChunk = false;
232             this.keepLocators = keepLocators;
233         }
234
235         private long getBytesToDownload() {
236             return bytesToDownload;
237         }
238
239         private void setBytesToDownload(long bytesToDownload) {
240             this.bytesToDownload = bytesToDownload;
241         }
242
243         private void requestNewDataChunk() {
244             if (!remainingDataInChunk) {
245                 currentDataChunkNumber++;
246                 if (currentDataChunkNumber < keepLocators.size()) {
247                     //swap data chunk for next one
248                     currentDataChunk = keepClient.getDataChunk(keepLocators.get(currentDataChunkNumber));
249                     log.debug(String.format("%d of %d data chunks from manifest stream downloaded", currentDataChunkNumber + 1, keepLocators.size()));
250                 } else {
251                     throw new ArvadosClientException("Data chunk required for download is missing.");
252                 }
253             }
254         }
255
256         private void writeDownFile(FileOutputStream fos) throws IOException {
257             //case 1: more bytes needed than available in current chunk (or whole current chunk needed) to download file
258             if (bytesToDownload >= currentDataChunk.length - bytesDownloadedFromChunk) {
259                 writeDownWholeDataChunk(fos);
260             }
261             //case 2: current data chunk contains more bytes than is needed for this file
262             else {
263                 writeDownDataChunkPartially(fos);
264             }
265         }
266
267         private void writeDownWholeDataChunk(FileOutputStream fos) throws IOException {
268             // write all remaining bytes from current chunk
269             fos.write(currentDataChunk, bytesDownloadedFromChunk, currentDataChunk.length - bytesDownloadedFromChunk);
270             //update bytesToDownload
271             bytesToDownload -= (currentDataChunk.length - bytesDownloadedFromChunk);
272             // set remaining data in chunk to false
273             remainingDataInChunk = false;
274             //reset bytesDownloadedFromChunk so that its set to 0 for the next chunk
275             bytesDownloadedFromChunk = 0;
276         }
277
278         private void writeDownDataChunkPartially(FileOutputStream fos) throws IOException {
279             //write all remaining bytes for this file from current chunk
280             fos.write(currentDataChunk, bytesDownloadedFromChunk, (int) bytesToDownload);
281             // update number of bytes downloaded from this chunk
282             bytesDownloadedFromChunk += bytesToDownload;
283             // set remaining data in chunk to true
284             remainingDataInChunk = true;
285             // reset bytesToDownload to exit while loop and move to the next file
286             bytesToDownload = 0;
287         }
288     }
289 }