X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/db4c2a1cbf4d526a16e59b58fbac81a703dee87b..1de8e55b47ea46fe1e589fbfe1ff0ae77b9e2cbf:/crunch_scripts/split-fastq.py diff --git a/crunch_scripts/split-fastq.py b/crunch_scripts/split-fastq.py index 1d81393dbb..17aabf2930 100755 --- a/crunch_scripts/split-fastq.py +++ b/crunch_scripts/split-fastq.py @@ -88,13 +88,13 @@ def splitfastq(p): if count % 10000 == 0: print >>sys.stderr, "Record %s at %s" % (count, p[i]["end"]) -prog = re.compile(r'(.*?)(_12)?\.fastq(\.gz)?$') +prog = re.compile(r'(.*?)(_[12])?\.fastq(\.gz)?$') # Look for fastq files for s in inp.all_streams(): for f in s.all_files(): name_pieces = prog.match(f.name()) - if name_pieces != None: + if name_pieces is not None: if s.name() != ".": # The downstream tool (run-command) only iterates over the top # level of directories so if there are fastq files in @@ -104,26 +104,26 @@ for s in inp.all_streams(): print >>sys.stderr, "fastq must be at the root of the collection" sys.exit(1) - if name_pieces.group(2) != None: + p = None + if name_pieces.group(2) is not None: if name_pieces.group(2) == "_1": p = [{}, {}] p[0]["reader"] = s.files()[name_pieces.group(0)] - if name_pieces.group(2) != None: - p[1]["reader"] = s.files()[name_pieces.group(1) + "_2.fastq" + name_pieces.group(2)] - else: - p[1]["reader"] = s.files()[name_pieces.group(1) + "_2.fastq"] + p[1]["reader"] = s.files()[name_pieces.group(1) + "_2.fastq" + (name_pieces.group(3) if name_pieces.group(3) else '')] else: p = [{}] p[0]["reader"] = s.files()[name_pieces.group(0)] - if chunking: - splitfastq(p) - else: - for i in xrange(0, len(p)): - m = p[i]["reader"].as_manifest()[1:] - manifest_list.append(["./_" + str(piece), m[:-1]]) - piece += 1 - -manifest_text = "\n".join(" ".join(m) for m in manifest_list) + if p is not None: + if chunking: + splitfastq(p) + else: + for i in xrange(0, len(p)): + m = p[i]["reader"].as_manifest().split() + m[0] = "./_" + str(piece) + manifest_list.append(m) + piece += 1 + +manifest_text = "\n".join(" ".join(m) for m in manifest_list) + "\n" arvados.current_task().set_output(manifest_text)