Tweak test script no issue #
[arvados.git] / src / main / java / org / arvados / client / logic / keep / FileDownloader.java
1 /*
2  * Copyright (C) The Arvados Authors. All rights reserved.
3  *
4  * SPDX-License-Identifier: AGPL-3.0 OR Apache-2.0
5  *
6  */
7
8 package org.arvados.client.logic.keep;
9
10 import com.google.common.collect.Lists;
11 import org.arvados.client.api.client.CollectionsApiClient;
12 import org.arvados.client.api.client.KeepWebApiClient;
13 import org.arvados.client.api.model.Collection;
14 import org.arvados.client.common.Characters;
15 import org.arvados.client.exception.ArvadosClientException;
16 import org.arvados.client.logic.collection.FileToken;
17 import org.arvados.client.logic.collection.ManifestDecoder;
18 import org.arvados.client.logic.collection.ManifestStream;
19 import org.arvados.client.logic.keep.exception.DownloadFolderAlreadyExistsException;
20 import org.arvados.client.logic.keep.exception.FileAlreadyExistsException;
21 import org.slf4j.Logger;
22
23 import java.io.File;
24 import java.io.FileOutputStream;
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.concurrent.CompletableFuture;
29 import java.util.stream.Collectors;
30 import java.util.stream.Stream;
31
32 public class FileDownloader {
33
34     private final KeepClient keepClient;
35     private final ManifestDecoder manifestDecoder;
36     private final CollectionsApiClient collectionsApiClient;
37     private final KeepWebApiClient keepWebApiClient;
38     private final Logger log = org.slf4j.LoggerFactory.getLogger(FileDownloader.class);
39
40     public FileDownloader(KeepClient keepClient, ManifestDecoder manifestDecoder, CollectionsApiClient collectionsApiClient, KeepWebApiClient keepWebApiClient) {
41         this.keepClient = keepClient;
42         this.manifestDecoder = manifestDecoder;
43         this.collectionsApiClient = collectionsApiClient;
44         this.keepWebApiClient = keepWebApiClient;
45     }
46
47     public List<FileToken> listFileInfoFromCollection(String collectionUuid) {
48         Collection requestedCollection = collectionsApiClient.get(collectionUuid);
49         String manifestText = requestedCollection.getManifestText();
50
51         // decode manifest text and get list of all FileTokens for this collection
52         return manifestDecoder.decode(manifestText)
53                 .stream()
54                 .flatMap(p -> p.getFileTokens().stream())
55                 .collect(Collectors.toList());
56     }
57
58     public File downloadSingleFileUsingKeepWeb(String filePathName, String collectionUuid, String pathToDownloadFolder) {
59         FileToken fileToken = getFileTokenFromCollection(filePathName, collectionUuid);
60         if (fileToken == null) {
61             throw new ArvadosClientException(String.format("%s not found in Collection with UUID %s", filePathName, collectionUuid));
62         }
63
64         File downloadedFile = checkIfFileExistsInTargetLocation(fileToken, pathToDownloadFolder);
65         try (FileOutputStream fos = new FileOutputStream(downloadedFile)) {
66             fos.write(keepWebApiClient.download(collectionUuid, filePathName));
67         } catch (IOException e) {
68             throw new ArvadosClientException(String.format("Unable to write down file %s", fileToken.getFileName()), e);
69         }
70         return downloadedFile;
71     }
72
73     public List<File> downloadFilesFromCollectionUsingKeepWeb(String collectionUuid, String pathToDownloadFolder) {
74         String collectionTargetDir = setTargetDirectory(collectionUuid, pathToDownloadFolder).getAbsolutePath();
75         List<FileToken> fileTokens = listFileInfoFromCollection(collectionUuid);
76
77         List<CompletableFuture<File>> futures = Lists.newArrayList();
78         for (FileToken fileToken : fileTokens) {
79             futures.add(CompletableFuture.supplyAsync(() -> this.downloadOneFileFromCollectionUsingKeepWeb(fileToken, collectionUuid, collectionTargetDir)));
80         }
81
82         @SuppressWarnings("unchecked")
83         CompletableFuture<File>[] array = futures.toArray(new CompletableFuture[0]);
84         return Stream.of(array)
85                 .map(CompletableFuture::join).collect(Collectors.toList());
86     }
87
88     private FileToken getFileTokenFromCollection(String filePathName, String collectionUuid) {
89         return listFileInfoFromCollection(collectionUuid)
90                 .stream()
91                 .filter(p -> (p.getFullPath()).equals(filePathName))
92                 .findFirst()
93                 .orElse(null);
94     }
95
96     private File checkIfFileExistsInTargetLocation(FileToken fileToken, String pathToDownloadFolder) {
97         String fileName = fileToken.getFileName();
98
99         File downloadFile = new File(pathToDownloadFolder + Characters.SLASH + fileName);
100         if (downloadFile.exists()) {
101             throw new FileAlreadyExistsException(String.format("File %s exists in location %s", fileName, pathToDownloadFolder));
102         } else {
103             return downloadFile;
104         }
105     }
106
107     private File downloadOneFileFromCollectionUsingKeepWeb(FileToken fileToken, String collectionUuid, String pathToDownloadFolder) {
108         String filePathName = fileToken.getPath() + fileToken.getFileName();
109         File downloadedFile = new File(pathToDownloadFolder + Characters.SLASH + filePathName);
110         downloadedFile.getParentFile().mkdirs();
111
112         try (FileOutputStream fos = new FileOutputStream(downloadedFile)) {
113             fos.write(keepWebApiClient.download(collectionUuid, filePathName));
114         } catch (IOException e) {
115             throw new RuntimeException(e);
116         }
117         return downloadedFile;
118     }
119
120     public List<File> downloadFilesFromCollection(String collectionUuid, String pathToDownloadFolder) {
121
122         // download requested collection and extract manifest text
123         Collection requestedCollection = collectionsApiClient.get(collectionUuid);
124         String manifestText = requestedCollection.getManifestText();
125
126         // if directory with this collectionUUID does not exist - create one
127         // if exists - abort (throw exception)
128         File collectionTargetDir = setTargetDirectory(collectionUuid, pathToDownloadFolder);
129
130         // decode manifest text and create list of ManifestStream objects containing KeepLocators and FileTokens
131         List<ManifestStream> manifestStreams = manifestDecoder.decode(manifestText);
132
133         //list of all downloaded files that will be returned by this method
134         List<File> downloadedFilesFromCollection = new ArrayList<>();
135
136         // download files for each manifest stream
137         for (ManifestStream manifestStream : manifestStreams)
138             downloadedFilesFromCollection.addAll(downloadFilesFromSingleManifestStream(manifestStream, collectionTargetDir));
139
140         log.debug(String.format("Total of: %d files downloaded", downloadedFilesFromCollection.size()));
141         return downloadedFilesFromCollection;
142     }
143
144     private File setTargetDirectory(String collectionUUID, String pathToDownloadFolder) {
145         //local directory to save downloaded files
146         File collectionTargetDir = new File(pathToDownloadFolder + Characters.SLASH + collectionUUID);
147         if (collectionTargetDir.exists()) {
148             throw new DownloadFolderAlreadyExistsException(String.format("Directory for collection UUID %s already exists", collectionUUID));
149         } else {
150             collectionTargetDir.mkdirs();
151         }
152         return collectionTargetDir;
153     }
154
155     private List<File> downloadFilesFromSingleManifestStream(ManifestStream manifestStream, File collectionTargetDir){
156         List<File> downloadedFiles = new ArrayList<>();
157         List<KeepLocator> keepLocators = manifestStream.getKeepLocators();
158         DownloadHelper downloadHelper = new DownloadHelper(keepLocators);
159
160         for (FileToken fileToken : manifestStream.getFileTokens()) {
161             File downloadedFile = new File(collectionTargetDir.getAbsolutePath() + Characters.SLASH + fileToken.getFullPath()); //create file
162             downloadedFile.getParentFile().mkdirs();
163
164             try (FileOutputStream fos = new FileOutputStream(downloadedFile, true)) {
165                 downloadHelper.setBytesToDownload(fileToken.getFileSize()); //update file size info
166
167                 //this part needs to be repeated for each file until whole file is downloaded
168                 do {
169                     downloadHelper.requestNewDataChunk(); //check if new data chunk needs to be downloaded
170                     downloadHelper.writeDownFile(fos); // download data from chunk
171                 } while (downloadHelper.getBytesToDownload() != 0);
172
173             } catch (IOException | ArvadosClientException e) {
174                 throw new ArvadosClientException(String.format("Unable to write down file %s", fileToken.getFileName()), e);
175             }
176
177             downloadedFiles.add(downloadedFile);
178             log.debug(String.format("File %d / %d downloaded from manifest stream",
179                     manifestStream.getFileTokens().indexOf(fileToken) + 1,
180                     manifestStream.getFileTokens().size()));
181         }
182         return downloadedFiles;
183     }
184
185     private class DownloadHelper {
186
187         // values for tracking file output streams and matching data chunks with initial files
188         int currentDataChunkNumber;
189         int bytesDownloadedFromChunk;
190         int bytesToDownload;
191         byte[] currentDataChunk;
192         boolean remainingDataInChunk;
193         final List<KeepLocator> keepLocators;
194
195         private DownloadHelper(List<KeepLocator> keepLocators) {
196             currentDataChunkNumber = -1;
197             bytesDownloadedFromChunk = 0;
198             remainingDataInChunk = false;
199             this.keepLocators = keepLocators;
200         }
201
202         private int getBytesToDownload() {
203             return bytesToDownload;
204         }
205
206         private void setBytesToDownload(int bytesToDownload) {
207             this.bytesToDownload = bytesToDownload;
208         }
209
210         private void requestNewDataChunk() {
211             if (!remainingDataInChunk) {
212                 currentDataChunkNumber++;
213                 if (currentDataChunkNumber < keepLocators.size()) {
214                     //swap data chunk for next one
215                     currentDataChunk = keepClient.getDataChunk(keepLocators.get(currentDataChunkNumber));
216                     log.debug(String.format("%d of %d data chunks from manifest stream downloaded", currentDataChunkNumber + 1, keepLocators.size()));
217                 } else {
218                     throw new ArvadosClientException("Data chunk required for download is missing.");
219                 }
220             }
221         }
222
223         private void writeDownFile(FileOutputStream fos) throws IOException {
224             //case 1: more bytes needed than available in current chunk (or whole current chunk needed) to download file
225             if (bytesToDownload >= currentDataChunk.length - bytesDownloadedFromChunk) {
226                 writeDownWholeDataChunk(fos);
227             }
228             //case 2: current data chunk contains more bytes than is needed for this file
229             else {
230                 writeDownDataChunkPartially(fos);
231             }
232         }
233
234         private void writeDownWholeDataChunk(FileOutputStream fos) throws IOException {
235             // write all remaining bytes from current chunk
236             fos.write(currentDataChunk, bytesDownloadedFromChunk, currentDataChunk.length - bytesDownloadedFromChunk);
237             //update bytesToDownload
238             bytesToDownload -= (currentDataChunk.length - bytesDownloadedFromChunk);
239             // set remaining data in chunk to false
240             remainingDataInChunk = false;
241             //reset bytesDownloadedFromChunk so that its set to 0 for the next chunk
242             bytesDownloadedFromChunk = 0;
243         }
244
245         private void writeDownDataChunkPartially(FileOutputStream fos) throws IOException {
246             //write all remaining bytes for this file from current chunk
247             fos.write(currentDataChunk, bytesDownloadedFromChunk, bytesToDownload);
248             // update number of bytes downloaded from this chunk
249             bytesDownloadedFromChunk += bytesToDownload;
250             // set remaining data in chunk to true
251             remainingDataInChunk = true;
252             // reset bytesToDownload to exit while loop and move to the next file
253             bytesToDownload = 0;
254         }
255     }
256 }