X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b5393c72ddd855564cae2f2d798a3e4496a8d2ec..1ad626b28816840288093d94a12ea7694201364b:/crunch_scripts/split-fastq.py diff --git a/crunch_scripts/split-fastq.py b/crunch_scripts/split-fastq.py index e242102777..17aabf2930 100755 --- a/crunch_scripts/split-fastq.py +++ b/crunch_scripts/split-fastq.py @@ -88,13 +88,13 @@ def splitfastq(p): if count % 10000 == 0: print >>sys.stderr, "Record %s at %s" % (count, p[i]["end"]) -prog = re.compile(r'(.*?)(_12)?\.fastq(\.gz)?$') +prog = re.compile(r'(.*?)(_[12])?\.fastq(\.gz)?$') # Look for fastq files for s in inp.all_streams(): for f in s.all_files(): name_pieces = prog.match(f.name()) - if name_pieces != None: + if name_pieces is not None: if s.name() != ".": # The downstream tool (run-command) only iterates over the top # level of directories so if there are fastq files in @@ -104,7 +104,8 @@ for s in inp.all_streams(): print >>sys.stderr, "fastq must be at the root of the collection" sys.exit(1) - if name_pieces.group(2) != None: + p = None + if name_pieces.group(2) is not None: if name_pieces.group(2) == "_1": p = [{}, {}] p[0]["reader"] = s.files()[name_pieces.group(0)] @@ -113,14 +114,16 @@ for s in inp.all_streams(): p = [{}] p[0]["reader"] = s.files()[name_pieces.group(0)] - if chunking: - splitfastq(p) - else: - for i in xrange(0, len(p)): - m = p[i]["reader"].as_manifest()[1:] - manifest_list.append(["./_" + str(piece), m[:-1]]) - piece += 1 - -manifest_text = "\n".join(" ".join(m) for m in manifest_list) + if p is not None: + if chunking: + splitfastq(p) + else: + for i in xrange(0, len(p)): + m = p[i]["reader"].as_manifest().split() + m[0] = "./_" + str(piece) + manifest_list.append(m) + piece += 1 + +manifest_text = "\n".join(" ".join(m) for m in manifest_list) + "\n" arvados.current_task().set_output(manifest_text)