7 api = arvados.api('v1')
12 # Look for paired reads
14 inp = arvados.CollectionReader(arvados.getjobparam('reads'))
16 prog = re.compile(r'(.*?)_1.fastq(.gz)?$')
20 def readline(reader, start):
24 r = reader.readfrom(start, 1024)
27 n = string.find(r, "\n")
33 for i in xrange(0, len(p)):
40 # read 4 lines starting at "start"
41 for ln in xrange(0, 4):
42 for i in xrange(0, len(p)):
43 r = readline(p[i]["reader"], p[i]["start"])
46 recordsize[i] += len(r)
49 for i in xrange(0, len(p)):
50 if ((p[i]["end"] - p[i]["start"]) + recordsize[i]) >= arvados.BLOCKSIZE:
54 for i in xrange(0, len(p)):
58 manifest.extend("./_" + str(piece))
59 manifest.extend([d[LOCATOR] for d in p["reader"]._stream._data_locators])
60 manifest.extend(["{}:{}:{}".format(seg[LOCATOR], seg[BLOCKSIZE], self.name().replace(' ', '\\040')) for seg in arvados.locators_and_ranges(p[i]["reader"].segments, p[i]["start"], p[i]["end"] - p[i]["start"])])
61 manifest_text += manifest.join(" ") + "\n"
62 p[i]["start"] = p[i]["end"]
64 for i in xrange(0, len(p)):
65 p[i]["end"] += recordsize[i]
68 for s in inp.all_streams():
70 for f in s.all_files():
71 result = prog.match(f.name())
74 p[0]["reader"] = s.files()[result.group(0)]
75 if result.group(2) != None:
76 p[1]["reader"] = s.files()[result.group(1) + "_2.fastq" + result.group(2)]
78 p[1]["reader"] = s.files()[result.group(1) + "_2.fastq"
80 #m0 = p[0]["reader"].as_manifest()[1:]
81 #m1 = p[1]["reader"].as_manifest()[1:]
82 #manifest_text += "./_" + str(piece) + m0
83 #manifest_text += "./_" + str(piece) + m1
86 # No pairs found so just put each fastq file into a separate directory
87 if manifest_text == "":
88 for s in inp.all_streams():
89 prog = re.compile("(.*?).fastq(.gz)?$")
91 for f in s.all_files():
92 result = prog.match(f.name())
95 p[0]["reader"] = s.files()[result.group(0)]
97 #m0 = p[0]["reader"].as_manifest()[1:]
98 #manifest_text += "./_" + str(piece) + m0
101 arvados.current_task().set_output(manifest_text)