X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/6f231c2537c196644f15eb7cac6f6861ea24e429..47eb67e4c084abde49d5463d4ced8b4436a59dfd:/crunch_scripts/decompress-all.py diff --git a/crunch_scripts/decompress-all.py b/crunch_scripts/decompress-all.py index 460425f8b2..100ea12239 100755 --- a/crunch_scripts/decompress-all.py +++ b/crunch_scripts/decompress-all.py @@ -1,4 +1,7 @@ #!/usr/bin/env python +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 # # decompress-all.py @@ -18,6 +21,7 @@ import re import subprocess import os import sys +import crunchutil.robust_put as robust_put arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=True) @@ -32,7 +36,7 @@ outdir = os.path.join(task.tmpdir, "output") os.makedirs(outdir) os.chdir(outdir) -if infile_parts == None: +if infile_parts is None: print >>sys.stderr, "Failed to parse input filename '%s' as a Keep file\n" % input_file sys.exit(1) @@ -40,17 +44,21 @@ cr = arvados.CollectionReader(infile_parts.group(1)) streamname = infile_parts.group(3)[1:] filename = infile_parts.group(4)[1:] -if streamname != None: +if streamname is not None: subprocess.call(["mkdir", "-p", streamname]) os.chdir(streamname) else: streamname = '.' -streamreader = filter(lambda s: s.name() == streamname, cr.all_streams())[0] -filereader = streamreader.files()[filename] -rc = subprocess.call(["dtrx", "-r", "-n", "-q", arvados.get_task_param_mount('input')]) -if rc == 0: - out = arvados.CollectionWriter() - out.write_directory_tree(outdir, max_manifest_depth=0) - task.set_output(out.finish()) + +m = re.match(r'.*\.(gz|Z|bz2|tgz|tbz|zip|rar|7z|cab|deb|rpm|cpio|gem)$', arvados.get_task_param_mount('input'), re.IGNORECASE) + +if m is not None: + rc = subprocess.call(["dtrx", "-r", "-n", "-q", arvados.get_task_param_mount('input')]) + if rc == 0: + task.set_output(robust_put.upload(outdir)) + else: + sys.exit(rc) else: + streamreader = filter(lambda s: s.name() == streamname, cr.all_streams())[0] + filereader = streamreader.files()[filename] task.set_output(streamname + filereader.as_manifest()[1:])