9 this_job = arvados.current_job()
10 this_task = arvados.current_task()
11 this_job_input = this_job['script_parameters']['input']
13 out = arvados.CollectionWriter()
14 out.set_current_file_name("arvados_objects.json")
21 for input_file in arvados.CollectionReader(this_job_input).all_files():
22 for line_number, line in enumerate(input_file.readlines()):
24 done_bytes += len(line)
25 new_done_ratio = 1.0 * done_bytes / input_file.size()
26 if line_number == 2 or new_done_ratio - done_ratio > 0.05:
27 sys.stderr.write("progress: %d%% after %d lines\n" % (int(done_ratio * 100), line_number+1))
28 done_ratio = new_done_ratio
30 words = string.split(string.strip(line), "\t")
33 for t in arvados.service.traits().list(
34 where=json.dumps({'name':words}),
38 for i, trait_name in enumerate(words[3:], start=3):
39 # find or create trait
40 if trait_name not in traits:
41 traits_match = arvados.service.traits().list(
42 where=json.dumps({'name':trait_name})
44 if len(traits_match) > 0:
45 traits[trait_name] = traits_match[0]
47 traits[trait_name] = arvados.service.traits().create(
48 trait=json.dumps({'name':trait_name})).execute()
50 out.write(json.dumps(traits[trait_name]))
53 huID_links_match = arvados.service.links().list(
54 where=json.dumps({'link_class':'identifier','name':words[0]})
56 if len(huID_links_match) > 0:
57 human_uuid = huID_links_match[0]['head_uuid']
59 human = arvados.service.humans().create(
62 huID_link = arvados.service.links().create(
64 'link_class':'identifier',
66 'head_kind':'arvados#human',
67 'head_uuid':human['uuid']
70 human_uuid = human['uuid']
72 for t in arvados.service.links().list(
75 'tail_uuid':human_uuid,
76 'tail_kind':'arvados#human',
77 'head_kind':'arvados#trait',
78 'link_class':'human_trait',
79 'name':'pgp-survey-response'
82 human_trait[t['head_uuid']] = t
83 for i, trait_value in enumerate(words[3:], start=3):
84 trait_uuid = traits[headings[i]]['uuid']
85 if trait_uuid in human_trait:
86 trait_link = human_trait[trait_uuid]
87 if trait_link['properties']['value'] != trait_value:
88 # update database value to match survey response
89 trait_link['properties']['value'] = trait_value
90 arvados.service.links().update(
91 uuid=trait_link['uuid'],
92 link=json.dumps({'properties':trait_link['properties']})
95 out.write(json.dumps(trait_link))
96 elif trait_value == '':
97 # nothing in database, nothing in input
101 'tail_uuid':human_uuid,
102 'tail_kind':'arvados#human',
103 'head_uuid':traits[headings[i]]['uuid'],
104 'head_kind':'arvados#trait',
105 'link_class':'human_trait',
106 'name':'pgp-survey-response',
107 'properties': { 'value': trait_value }
109 arvados.service.links().create(
110 link=json.dumps(trait_link)
113 out.write(json.dumps(trait_link))
116 this_task.set_output(out.finish())