1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
5 #' R6 Class Representing Arvados Collection
8 #' Collection class provides interface for working with Arvados collections,
9 #' for exaplme actions like creating, updating, moving or removing are possible.
12 #' \code{\link{https://github.com/arvados/arvados/tree/main/sdk/R}}
16 Collection <- R6::R6Class(
22 #' @field uuid Autentic for Collection UUID.
26 #' Initialize new enviroment.
27 #' @param api Arvados enviroment.
28 #' @param uuid The UUID Autentic for Collection UUID.
29 #' @return A new `Collection` object.
31 #' collection <- Collection$new(arv, CollectionUUID)
32 initialize = function(api, uuid)
34 private$REST <- api$getRESTService()
39 #' Adds ArvadosFile or Subcollection specified by content to the collection. Used only with ArvadosFile or Subcollection.
40 #' @param content Content to be added.
41 #' @param relativePath Path to add content.
42 add = function(content, relativePath = "")
44 if(is.null(private$tree))
45 private$generateCollectionTreeStructure()
47 if(relativePath == "" ||
48 relativePath == "." ||
51 subcollection <- private$tree$getTree()
55 relativePath <- trimFromEnd(relativePath, "/")
56 subcollection <- self$get(relativePath)
59 if(is.null(subcollection))
60 stop(paste("Subcollection", relativePath, "doesn't exist."))
62 if("ArvadosFile" %in% class(content) ||
63 "Subcollection" %in% class(content))
65 if(!is.null(content$getCollection()))
66 stop("Content already belongs to a collection.")
68 if(content$getName() == "")
69 stop("Content has invalid name.")
71 subcollection$add(content)
76 stop(paste0("Expected AravodsFile or Subcollection object, got ",
77 paste0("(", paste0(class(content), collapse = ", "), ")"),
84 #' @param file Name of the file.
85 #' @param col Collection from which the file is read.
86 #' @param sep Separator used in reading tsv, csv file format.
87 #' @param istable Used in reading txt file to check if the file is table or not.
88 #' @param fileclass Used in reading fasta file to set file class.
89 #' @param Ncol Used in reading binary file to set numbers of columns in data.frame.
90 #' @param Nrow Used in reading binary file to set numbers of rows in data.frame size.
92 #' collection <- Collection$new(arv, collectionUUID)
93 #' readFile <- collection$readArvFile(arvadosFile, istable = 'yes') # table
94 #' readFile <- collection$readArvFile(arvadosFile, istable = 'no') # text
95 #' readFile <- collection$readArvFile(arvadosFile) # xlsx, csv, tsv, rds, rdata
96 #' readFile <- collection$readArvFile(arvadosFile, fileclass = 'fasta') # fasta
97 #' readFile <- collection$readArvFile(arvadosFile, Ncol= 4, Nrow = 32) # binary, only numbers
98 #' readFile <- collection$readArvFile(arvadosFile, Ncol = 5, Nrow = 150, istable = "factor") # binary with factor or text
99 readArvFile = function(file, con, sep = ',', istable = NULL, fileclass = "SeqFastadna", Ncol = NULL, Nrow = NULL, wantedFunction = NULL)
101 arvFile <- self$get(file)
102 FileName <- arvFile$getName()
103 FileName <- tolower(FileName)
104 FileFormat <- gsub(".*\\.", "", FileName)
107 ARVADOS_API_TOKEN <- Sys.getenv("ARVADOS_API_TOKEN")
108 ARVADOS_API_HOST <- Sys.getenv("ARVADOS_API_HOST")
109 my_collection <- self$uuid
110 key <- gsub("/", "_", ARVADOS_API_TOKEN)
113 "AWS_ACCESS_KEY_ID" = key,
114 "AWS_SECRET_ACCESS_KEY" = key,
115 "AWS_DEFAULT_REGION" = "collections",
116 "AWS_S3_ENDPOINT" = gsub("api[.]", "", ARVADOS_API_HOST))
118 if (FileFormat == "txt") {
119 if (is.null(istable)){
120 stop(paste('You need to paste whether it is a text or table file'))
121 } else if (istable == 'no') {
122 fileContent <- arvFile$read("text") # used to read
123 fileContent <- gsub("[\r\n]", " ", fileContent)
124 } else if (istable == 'yes') {
125 arvConnection <- arvFile$connection("r") # used to make possible use different function later
126 fileContent <- read.table(arvConnection)
129 else if (FileFormat == "xlsx") {
130 fileContent <- aws.s3::s3read_using(FUN = openxlsx::read.xlsx, object = file, bucket = my_collection)
132 else if (FileFormat == "csv" || FileFormat == "tsv") {
133 arvConnection <- arvFile$connection("r")
134 if (FileFormat == "tsv"){
135 mytable <- read.table(arvConnection, sep = '\t')
136 } else if (FileFormat == "csv" & sep == '\t') {
137 mytable <- read.table(arvConnection, sep = '\t')
138 } else if (FileFormat == "csv") {
139 mytable <- read.table(arvConnection, sep = ',')
141 stop(paste('File format not supported, use arvadosFile$connection() and customise it'))
144 else if (FileFormat == "fasta") {
145 fastafile <- aws.s3::s3read_using(FUN = seqinr::read.fasta, as.string = TRUE, object = file, bucket = my_collection)
147 else if (FileFormat == "dat" || FileFormat == "bin") {
148 fileContent <- gzcon(arvFile$connection("rb"))
150 # function to precess data to binary format
151 read_bin.file <- function(fileContent) {
153 column.names <- readBin(fileContent, character(), n = Ncol)
154 bindata <- readBin(fileContent, numeric(), Nrow*Ncol+Ncol)
156 res <- which(bindata < 0.0000001)
158 bindata <- bindata[-res]
163 data <- data.frame(matrix(data = NA, nrow = Nrow, ncol = Ncol))
165 data[,i] <- bindata[(1+Nrow*(i-1)):(Nrow*i)]
167 colnames(data) = column.names
169 len <- which(is.na(data[,Ncol])) # error if sth went wrong
170 if (length(len) == 0) {
173 stop(paste("there is a factor or text in the table, customize the function by typing more arguments"))
176 if (is.null(Nrow) | is.null(Ncol)){
177 stop(paste('You need to specify numbers of columns and rows'))
179 if (is.null(istable)) {
180 fileContent <- read_bin.file(fileContent) # call a function
181 } else if (istable == "factor") { # if there is a table with col name
182 fileContent <- read_bin.file(fileContent)
185 else if (FileFormat == "rds" || FileFormat == "rdata") {
186 arvConnection <- arvFile$connection("rb")
187 mytable <- readRDS(gzcon(arvConnection))
190 stop(parse(('File format not supported, use arvadosFile$connection() and customise it')))
195 #' Write file content
196 #' @param name Name of the file.
197 #' @param file File to be saved.
198 #' @param istable Used in writing txt file to check if the file is table or not.
200 #' collection <- Collection$new(arv, collectionUUID)
201 #' writeFile <- collection$writeFile(name = "myoutput.csv", file = file, fileFormat = "csv", istable = NULL, collectionUUID = collectionUUID) # csv
202 #' writeFile <- collection$writeFile(name = "myoutput.tsv", file = file, fileFormat = "tsv", istable = NULL, collectionUUID = collectionUUID) # tsv
203 #' writeFile <- collection$writeFile(name = "myoutput.fasta", file = file, fileFormat = "fasta", istable = NULL, collectionUUID = collectionUUID) # fasta
204 #' writeFile <- collection$writeFile(name = "myoutputtable.txt", file = file, fileFormat = "txt", istable = "yes", collectionUUID = collectionUUID) # txt table
205 #' writeFile <- collection$writeFile(name = "myoutputtext.txt", file = file, fileFormat = "txt", istable = "no", collectionUUID = collectionUUID) # txt text
206 #' writeFile <- collection$writeFile(name = "myoutputbinary.dat", file = file, fileFormat = "dat", collectionUUID = collectionUUID) # binary
207 #' writeFile <- collection$writeFile(name = "myoutputxlsx.xlsx", file = file, fileFormat = "xlsx", collectionUUID = collectionUUID) # xlsx
208 writeFile = function(name, file, collectionUUID, fileFormat, istable = NULL, seqName = NULL) {
211 ARVADOS_API_TOKEN <- Sys.getenv("ARVADOS_API_TOKEN")
212 ARVADOS_API_HOST <- Sys.getenv("ARVADOS_API_HOST")
213 my_collection <- self$uuid
214 key <- gsub("/", "_", ARVADOS_API_TOKEN)
217 "AWS_ACCESS_KEY_ID" = key,
218 "AWS_SECRET_ACCESS_KEY" = key,
219 "AWS_DEFAULT_REGION" = "collections",
220 "AWS_S3_ENDPOINT" = gsub("api[.]", "", ARVADOS_API_HOST))
223 if (fileFormat == "txt") {
224 if (istable == "yes") {
225 aws.s3::s3write_using(file, FUN = write.table, object = name, bucket = collectionUUID)
226 } else if (istable == "no") {
227 aws.s3::s3write_using(file, FUN = writeChar, object = name, bucket = collectionUUID)
229 stop(paste("Specify parametr istable"))
231 } else if (fileFormat == "csv") {
232 aws.s3::s3write_using(file, FUN = write.csv, object = name, bucket = collectionUUID)
233 } else if (fileFormat == "tsv") {
234 aws.s3::s3write_using(file, FUN = write.table, row.names = FALSE, sep = "\t", object = name, bucket = collectionUUID)
235 } else if (fileFormat == "fasta") {
236 aws.s3::s3write_using(file, FUN = seqinr::write.fasta, name = seqName, object = name, bucket = collectionUUID)
237 } else if (fileFormat == "xlsx") {
238 aws.s3::s3write_using(file, FUN = openxlsx::write.xlsx, object = name, bucket = collectionUUID)
239 } else if (fileFormat == "dat" || fileFormat == "bin") {
240 aws.s3::s3write_using(file, FUN = writeBin, object = name, bucket = collectionUUID)
242 stop(parse(('File format not supported, use arvadosFile$connection() and customise it')))
247 #' Creates one or more ArvadosFiles and adds them to the collection at specified path.
248 #' @param files Content to be created.
250 #' collection <- arv$collections_create(name = collectionTitle, description = collectionDescription, owner_uuid = collectionOwner, properties = list("ROX37196928443768648" = "ROX37742976443830153"))
251 create = function(files)
253 if(is.null(private$tree))
254 private$generateCollectionTreeStructure()
256 if(is.character(files))
258 sapply(files, function(file)
260 childWithSameName <- self$get(file)
261 if(!is.null(childWithSameName))
262 stop("Destination already contains file with same name.")
264 newTreeBranch <- private$tree$createBranch(file)
265 private$tree$addBranch(private$tree$getTree(), newTreeBranch)
267 private$REST$create(file, self$uuid)
268 newTreeBranch$setCollection(self)
274 stop(paste0("Expected character vector, got ",
275 paste0("(", paste0(class(files), collapse = ", "), ")"),
281 #' Remove one or more files from the collection.
282 #' @param paths Content to be removed.
284 #' collection$remove(fileName.format)
285 remove = function(paths)
287 if(is.null(private$tree))
288 private$generateCollectionTreeStructure()
290 if(is.character(paths))
292 sapply(paths, function(filePath)
294 filePath <- trimFromEnd(filePath, "/")
295 file <- self$get(filePath)
298 stop(paste("File", filePath, "doesn't exist."))
300 parent <- file$getParent()
303 stop("You can't delete root folder.")
305 parent$remove(file$getName())
312 stop(paste0("Expected character vector, got ",
313 paste0("(", paste0(class(paths), collapse = ", "), ")"),
319 #' Moves ArvadosFile or Subcollection to another location in the collection.
320 #' @param content Content to be moved.
321 #' @param destination Path to move content.
323 #' collection$move("fileName.format", path)
324 move = function(content, destination)
326 if(is.null(private$tree))
327 private$generateCollectionTreeStructure()
329 content <- trimFromEnd(content, "/")
331 elementToMove <- self$get(content)
333 if(is.null(elementToMove))
334 stop("Content you want to move doesn't exist in the collection.")
336 elementToMove$move(destination)
340 #' Copies ArvadosFile or Subcollection to another location in the collection.
341 #' @param content Content to be moved.
342 #' @param destination Path to move content.
344 #' copied <- collection$copy("oldName.format", "newName.format")
345 copy = function(content, destination)
347 if(is.null(private$tree))
348 private$generateCollectionTreeStructure()
350 content <- trimFromEnd(content, "/")
352 elementToCopy <- self$get(content)
354 if(is.null(elementToCopy))
355 stop("Content you want to copy doesn't exist in the collection.")
357 elementToCopy$copy(destination)
361 #' Refreshes the environment.
363 #' collection$refresh()
366 if(!is.null(private$tree))
368 private$tree$getTree()$setCollection(NULL, setRecursively = TRUE)
374 #' Returns collections file content as character vector.
376 #' list <- collection$getFileListing()
377 getFileListing = function()
379 if(is.null(private$tree))
380 private$generateCollectionTreeStructure()
382 content <- private$REST$getCollectionContent(self$uuid)
383 content[order(tolower(content))]
387 #' If relativePath is valid, returns ArvadosFile or Subcollection specified by relativePath, else returns NULL.
388 #' @param relativePath Path from content is taken.
390 #' arvadosFile <- collection$get(fileName)
391 get = function(relativePath)
393 if(is.null(private$tree))
394 private$generateCollectionTreeStructure(relativePath)
396 private$tree$getElement(relativePath)
399 getRESTService = function() private$REST,
400 setRESTService = function(newRESTService) private$REST <- newRESTService
405 #' @tree beautiful tree of sth
409 generateCollectionTreeStructure = function(relativePath = NULL)
411 if(is.null(self$uuid))
412 stop("Collection uuid is not defined.")
414 if(is.null(private$REST))
415 stop("REST service is not defined.")
417 private$fileContent <- private$REST$getCollectionContent(self$uuid, relativePath)
418 private$tree <- CollectionTree$new(private$fileContent, self)
427 #' Custom print function for Collection class
429 #' @param x Instance of Collection class
430 #' @param ... Optional arguments.
432 print.Collection = function(x, ...)
434 cat(paste0("Type: ", "\"", "Arvados Collection", "\""), sep = "\n")
435 cat(paste0("uuid: ", "\"", x$uuid, "\""), sep = "\n")