#!/usr/bin/env python

import arvados
import string
import json
import UserDict
import sys

this_job = arvados.current_job()
this_task = arvados.current_task()
this_job_input = this_job['script_parameters']['input']

out = arvados.CollectionWriter()
out.set_current_file_name("arvados_objects.json")
out.write("[\n")
separator = ""

traits = {}
done_bytes = 0
done_ratio = 0
for input_file in arvados.CollectionReader(this_job_input).all_files():
    for line_number, line in enumerate(input_file.readlines()):

        done_bytes += len(line)
        new_done_ratio = 1.0 * done_bytes / input_file.size()
        if line_number == 2 or new_done_ratio - done_ratio > 0.05:
            sys.stderr.write("progress: %d%% after %d lines\n" % (int(done_ratio * 100), line_number+1))
            done_ratio = new_done_ratio

        words = string.split(string.strip(line), "\t")
        if line_number == 0:
            headings = words
            for t in arvados.service.traits().list(
                where=json.dumps({'name':words}),
                limit=1000
                ).execute()['items']:
                traits[t['name']] = t
            for i, trait_name in enumerate(words[3:], start=3):
                # find or create trait
                if trait_name not in traits:
                    traits_match = arvados.service.traits().list(
                        where=json.dumps({'name':trait_name})
                        ).execute()['items']
                    if len(traits_match) > 0:
                        traits[trait_name] = traits_match[0]
                    else:
                        traits[trait_name] = arvados.service.traits().create(
                            trait=json.dumps({'name':trait_name})).execute()
                out.write(separator)
                out.write(json.dumps(traits[trait_name]))
                separator = ",\n"
        else:
            huID_links_match = arvados.service.links().list(
                where=json.dumps({'link_class':'identifier','name':words[0]})
                ).execute()['items']
            if len(huID_links_match) > 0:
                human_uuid = huID_links_match[0]['head_uuid']
            else:
                human = arvados.service.humans().create(
                    human=json.dumps({})
                    ).execute()
                huID_link = arvados.service.links().create(
                    link=json.dumps({
                            'link_class':'identifier',
                            'name':words[0],
                            'head_kind':'arvados#human',
                            'head_uuid':human['uuid']
                            })
                    ).execute()
                human_uuid = human['uuid']
            human_trait = {}
            for t in arvados.service.links().list(
                limit=10000,
                where=json.dumps({
                    'tail_uuid':human_uuid,
                    'tail_kind':'arvados#human',
                    'head_kind':'arvados#trait',
                    'link_class':'human_trait',
                    'name':'pgp-survey-response'
                    })
                ).execute()['items']:
                human_trait[t['head_uuid']] = t
            for i, trait_value in enumerate(words[3:], start=3):
                trait_uuid = traits[headings[i]]['uuid']
                if trait_uuid in human_trait:
                    trait_link = human_trait[trait_uuid]
                    if trait_link['properties']['value'] != trait_value:
                        # update database value to match survey response
                        trait_link['properties']['value'] = trait_value
                        arvados.service.links().update(
                            uuid=trait_link['uuid'],
                            link=json.dumps({'properties':trait_link['properties']})
                            ).execute()
                    out.write(",\n")
                    out.write(json.dumps(trait_link))
                elif trait_value == '':
                    # nothing in database, nothing in input
                    pass
                else:
                    trait_link = {
                        'tail_uuid':human_uuid,
                        'tail_kind':'arvados#human',
                        'head_uuid':traits[headings[i]]['uuid'],
                        'head_kind':'arvados#trait',
                        'link_class':'human_trait',
                        'name':'pgp-survey-response',
                        'properties': { 'value': trait_value }
                        }
                    arvados.service.links().create(
                        link=json.dumps(trait_link)
                        ).execute()
                    out.write(",\n")
                    out.write(json.dumps(trait_link))

out.write("\n]\n")
this_task.set_output(out.finish())