X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b1365ce7c1ccc74f479b1ebdf31b5da52028da84..08181a95191f00f2069758625512ada2788646e2:/crunch_scripts/split-fastq.py?ds=sidebyside diff --git a/crunch_scripts/split-fastq.py b/crunch_scripts/split-fastq.py index 8382eb13f9..17aabf2930 100755 --- a/crunch_scripts/split-fastq.py +++ b/crunch_scripts/split-fastq.py @@ -88,42 +88,42 @@ def splitfastq(p): if count % 10000 == 0: print >>sys.stderr, "Record %s at %s" % (count, p[i]["end"]) -prog = re.compile(r'(.*?)(_12)?\.fastq(\.gz)?$') +prog = re.compile(r'(.*?)(_[12])?\.fastq(\.gz)?$') # Look for fastq files for s in inp.all_streams(): for f in s.all_files(): name_pieces = prog.match(f.name()) - if name_pieces != None: + if name_pieces is not None: if s.name() != ".": # The downstream tool (run-command) only iterates over the top # level of directories so if there are fastq files in # directories in the input, the choice is either to forget # there are directories (which might lead to name conflicts) or # just fail. - print >>sys.stderr, "fastq must be at the root of the collection") + print >>sys.stderr, "fastq must be at the root of the collection" sys.exit(1) - if name_pieces.group(2) != None: + p = None + if name_pieces.group(2) is not None: if name_pieces.group(2) == "_1": p = [{}, {}] p[0]["reader"] = s.files()[name_pieces.group(0)] - if name_pieces.group(2) != None: - p[1]["reader"] = s.files()[name_pieces.group(1) + "_2.fastq" + name_pieces.group(2)] - else: - p[1]["reader"] = s.files()[name_pieces.group(1) + "_2.fastq"] + p[1]["reader"] = s.files()[name_pieces.group(1) + "_2.fastq" + (name_pieces.group(3) if name_pieces.group(3) else '')] else: p = [{}] p[0]["reader"] = s.files()[name_pieces.group(0)] - if chunking: - splitfastq(p) - else: - for i in xrange(0, len(p)): - m = p[i]["reader"].as_manifest()[1:] - manifest_list.append(["./_" + str(piece), m[:-1]]) - piece += 1 - -manifest_text = "\n".join(" ".join(m) for m in manifest_list) + if p is not None: + if chunking: + splitfastq(p) + else: + for i in xrange(0, len(p)): + m = p[i]["reader"].as_manifest().split() + m[0] = "./_" + str(piece) + manifest_list.append(m) + piece += 1 + +manifest_text = "\n".join(" ".join(m) for m in manifest_list) + "\n" arvados.current_task().set_output(manifest_text)