7 api = arvados.api('v1')
12 # Look for paired reads
14 inp = arvados.CollectionReader(arvados.getjobparam('reads'))
16 prog = re.compile("(.*?)_1.fastq(.gz)?$")
20 def readline(reader, start):
24 r = reader.readfrom(start, 1024)
27 n = string.find(r, "\n")
33 for i in xrange(0, len(p)):
40 # read 4 lines starting at "start"
41 for ln in xrange(0, 4):
42 for i in xrange(0, len(p)):
43 r = readline(p[i]["reader"], p[i]["start"])
46 recordsize[i] += len(r)
49 for i in xrange(0, len(p)):
50 if ((p[i]["end"] - p[i]["start"]) + recordsize[i]) >= arvados.BLOCKSIZE:
54 for i in xrange(0, len(p)):
58 manifest.extend("./_" + str(piece))
59 manifest.extend([d[LOCATOR] for d in p["reader"]._stream._data_locators])
60 manifest.extend(["{}:{}:{}".format(seg[LOCATOR], seg[BLOCKSIZE], self.name().replace(' ', '\\040')) for seg in arvados.locators_and_ranges(p[i]["reader"].segments, p[i]["start"], p[i]["end"] - p[i]["start"])])
61 manifest_text += manifest.join(" ") + "\n"
62 p[i]["start"] = p[i]["end"]
64 for i in xrange(0, len(p)):
65 p[i]["end"] += recordsize[i]
68 for s in inp.all_streams():
70 for f in s.all_files():
71 result = prog.match(f.name())
74 p[0]["reader"] = s.files()[result.group(0)]
75 p[1]["reader"] = s.files()[result.group(1) + "_2.fastq" + result.group(2)]
77 #m0 = p[0]["reader"].as_manifest()[1:]
78 #m1 = p[1]["reader"].as_manifest()[1:]
79 #manifest_text += "./_" + str(piece) + m0
80 #manifest_text += "./_" + str(piece) + m1
83 # No pairs found so just put each fastq file into a separate directory
84 if manifest_text == "":
85 for s in inp.all_streams():
86 prog = re.compile("(.*?).fastq(.gz)?$")
88 for f in s.all_files():
89 result = prog.match(f.name())
92 p[0]["reader"] = s.files()[result.group(0)]
94 #m0 = p[0]["reader"].as_manifest()[1:]
95 #manifest_text += "./_" + str(piece) + m0
98 arvados.current_task().set_output(manifest_text)