--- layout: default navsection: sdk navmenu: Python title: Code cookbook ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} # "Cancel a container request":#cancel-a-container-request # "Cancel all container requests":#cancel-all-container-requests # "List completed container requests":#list-completed-container-requests # "Get input of a CWL workflow":#get-input-of-a-cwl-workflow # "Get output of a CWL workflow":#get-output-of-a-cwl-workflow # "Get state of a CWL workflow":#get-state-of-a-cwl-workflow # "List input of child requests":#list-input-of-child-requests # "List output of child requests":#list-output-of-child-requests # "List failed child requests":#list-failed-child-requests # "Get log of a child request":#get-log-of-a-child-request # "Create a collection sharing link":#sharing-link # "Combine two or more collections":#combine-two-or-more-collections # "Upload a file into a new collection":#upload-a-file-into-a-new-collection # "Download a file from a collection":#download-a-file-from-a-collection # "Copy files from a collection to a new collection":#copy-files-from-a-collection-to-a-new-collection # "Copy files from a collection to another collection":#copy-files-from-a-collection-to-another-collection # "Delete a file from an existing collection":#delete-a-file-from-an-existing-collection # "Listing records with paging":#listing-records-with-paging # "Querying the vocabulary definition":#querying-the-vocabulary-definition # "Translating between vocabulary identifiers and labels":#translating-between-vocabulary-identifiers-and-labels # "Create a Project":#create-a-project h2(#cancel-a-container-request). Cancel a container request {% codeblock as python %} import arvados arvados.api().container_requests().update(uuid=container_request_uuid, body={"priority": 0}).execute() {% endcodeblock %} h2(#cancel-all-container-requests). Cancel all container requests {% codeblock as python %} import arvados api = arvados.api() result = api.container_requests().list(filters=[["state", "=", "Committed"], ["priority", ">", "0"]]).execute()["items"] for container_request in result: api.container_requests().update(uuid=container_request["uuid"], body={"priority": 0}).execute() {% endcodeblock %} h2(#list-completed-container-requests). List completed container requests {% codeblock as python %} import arvados api = arvados.api() result = api.container_requests().list(filters=[["name", "like", name], ["state", "=", "Final"]]).execute()["items"] container_uuids = [cr["container_uuid"] for cr in result] containers = api.containers().list(filters=[["uuid", "in", container_uuids]]).execute()["items"] container_dict = {c["uuid"]: c for c in containers} for container_request in result: container = container_dict[container_request["container_uuid"]] print("%s, %s, %s" % (container_request["uuid"], container_request["name"], "Success" if container["exit_code"] == 0 else "Failed")) {% endcodeblock %} h2(#get-input-of-a-cwl-workflow). Get input of a CWL workflow {% codeblock as python %} import arvados api = arvados.api() container_request_uuid="zzzzz-xvhdp-zzzzzzzzzzzzzzz" container_request = api.container_requests().get(uuid=container_request_uuid).execute() print(container_request["mounts"]["/var/lib/cwl/cwl.input.json"]) {% endcodeblock %} h2(#get-output-of-a-cwl-workflow). Get output of a CWL workflow {% codeblock as python %} import arvados import arvados.collection api = arvados.api() container_request_uuid="zzzzz-xvhdp-zzzzzzzzzzzzzzz" container_request = api.container_requests().get(uuid=container_request_uuid).execute() collection = arvados.collection.CollectionReader(container_request["output_uuid"]) print(collection.open("cwl.output.json").read()) {% endcodeblock %} h2(#get-state-of-a-cwl-workflow). Get state of a CWL workflow {% codeblock as python %} import arvados def get_cr_state(cr_uuid): api = arvados.api() cr = api.container_requests().get(uuid=cr_uuid).execute() if cr['container_uuid'] is None: return cr['state'] c = api.containers().get(uuid=cr['container_uuid']).execute() if cr['state'] == 'Final' and c['state'] != 'Complete': return 'Cancelled' elif c['state'] in ['Locked', 'Queued']: if c['priority'] == 0: return 'On hold' else: return 'Queued' elif c['state'] == 'Complete' and c['exit_code'] != 0: return 'Failed' elif c['state'] == 'Running': if c['runtime_status'].get('error', None): return 'Failing' elif c['runtime_status'].get('warning', None): return 'Warning' return c['state'] container_request_uuid = 'zzzzz-xvhdp-zzzzzzzzzzzzzzz' print(get_cr_state(container_request_uuid)) {% endcodeblock %} h2(#list-input-of-child-requests). List input of child requests {% codeblock as python %} import arvados api = arvados.api() parent_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz" namefilter = "bwa%" # the "like" filter uses SQL pattern match syntax container_request = api.container_requests().get(uuid=parent_request_uuid).execute() parent_container_uuid = container_request["container_uuid"] child_requests = api.container_requests().list(filters=[ ["requesting_container_uuid", "=", parent_container_uuid], ["name", "like", namefilter]]).execute() for c in child_requests["items"]: print("%s" % c["name"]) for m in c["mounts"].values(): if "portable_data_hash" in m: print(" %s" % m["portable_data_hash"]) {% endcodeblock %} h2(#list-output-of-child-requests). List output of child requests {% codeblock as python %} import arvados api = arvados.api() parent_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz" namefilter = "bwa%" # the "like" filter uses SQL pattern match syntax container_request = api.container_requests().get(uuid=parent_request_uuid).execute() parent_container_uuid = container_request["container_uuid"] child_requests = api.container_requests().list(filters=[ ["requesting_container_uuid", "=", parent_container_uuid], ["name", "like", namefilter]]).execute() output_uuids = [c["output_uuid"] for c in child_requests["items"]] collections = api.collections().list(filters=[["uuid", "in", output_uuids]]).execute() uuid_to_pdh = {c["uuid"]: c["portable_data_hash"] for c in collections["items"]} for c in child_requests["items"]: print("%s -> %s" % (c["name"], uuid_to_pdh[c["output_uuid"]])) {% endcodeblock %} h2(#list-failed-child-requests). List failed child requests {% codeblock as python %} import arvados api = arvados.api() parent_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz" container_request = api.container_requests().get(uuid=parent_request_uuid).execute() parent_container_uuid = container_request["container_uuid"] child_requests = api.container_requests().list(filters=[ ["requesting_container_uuid", "=", parent_container_uuid]], limit=1000).execute() child_containers = {c["container_uuid"]: c for c in child_requests["items"]} cancelled_child_containers = api.containers().list(filters=[ ["exit_code", "!=", "0"], ["uuid", "in", list(child_containers.keys())]], limit=1000).execute() for c in cancelled_child_containers["items"]: print("%s (%s)" % (child_containers[c["uuid"]]["name"], child_containers[c["uuid"]]["uuid"])) {% endcodeblock %} h2(#get-log-of-a-child-request). Get log of a child request {% codeblock as python %} import arvados import arvados.collection api = arvados.api() container_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz" container_request = api.container_requests().get(uuid=container_request_uuid).execute() collection = arvados.collection.CollectionReader(container_request["log_uuid"]) for c in collection: if isinstance(collection.find(c), arvados.arvfile.ArvadosFile): print(collection.open(c).read()) {% endcodeblock %} h2(#sharing_link). Create a collection sharing link {% codeblock as python %} import arvados api = arvados.api() download="https://your.download.server" collection_uuid="zzzzz-4zz18-zzzzzzzzzzzzzzz" token = api.api_client_authorizations().create(body={"api_client_authorization":{"scopes": [ "GET /arvados/v1/collections/%s" % collection_uuid, "GET /arvados/v1/collections/%s/" % collection_uuid, "GET /arvados/v1/keep_services/accessible"]}}).execute() print("%s/c=%s/t=%s/_/" % (download, collection_uuid, token["api_token"])) {% endcodeblock %} h2(#combine-two-or-more-collections). Combine two or more collections Note, if two collections have files of the same name, the contents will be concatenated in the resulting manifest. {% codeblock as python %} import arvados import arvados.collection api = arvados.api() project_uuid = "zzzzz-j7d0g-zzzzzzzzzzzzzzz" collection_uuids = ["zzzzz-4zz18-aaaaaaaaaaaaaaa", "zzzzz-4zz18-bbbbbbbbbbbbbbb"] combined_manifest = "" for u in collection_uuids: c = api.collections().get(uuid=u).execute() combined_manifest += c["manifest_text"] newcol = arvados.collection.Collection(combined_manifest) newcol.save_new(name="My combined collection", owner_uuid=project_uuid) {% endcodeblock %} h2(#upload-a-file-into-a-new-collection). Upload a file into a new collection {% codeblock as python %} import arvados import arvados.collection project_uuid = "zzzzz-j7d0g-zzzzzzzzzzzzzzz" collection_name = "My collection" filename = "file1.txt" api = arvados.api() c = arvados.collection.Collection() with open(filename, "rb") as reader: with c.open(filename, "wb") as writer: content = reader.read(128*1024) while content: writer.write(content) content = reader.read(128*1024) c.save_new(name=collection_name, owner_uuid=project_uuid) print("Saved %s to %s" % (collection_name, c.manifest_locator())) {% endcodeblock %} h2(#download-a-file-from-a-collection). Download a file from a collection {% codeblock as python %} import arvados import arvados.collection collection_uuid = "zzzzz-4zz18-zzzzzzzzzzzzzzz" filename = "file1.txt" api = arvados.api() c = arvados.collection.CollectionReader(collection_uuid) with c.open(filename, "rb") as reader: with open(filename, "wb") as writer: content = reader.read(128*1024) while content: writer.write(content) content = reader.read(128*1024) print("Finished downloading %s" % filename) {% endcodeblock %} h2(#copy-files-from-a-collection-to-a-new-collection). Copy files from a collection to a new collection {% codeblock as python %} import arvados.collection source_collection = "zzzzz-4zz18-zzzzzzzzzzzzzzz" target_project = "zzzzz-j7d0g-zzzzzzzzzzzzzzz" target_name = "Files copied from source_collection" files_to_copy = ["folder1/sample1/sample1_R1.fastq", "folder1/sample2/sample2_R1.fastq"] source = arvados.collection.CollectionReader(source_collection) target = arvados.collection.Collection() for f in files_to_copy: target.copy(f, "", source_collection=source) target.save_new(name=target_name, owner_uuid=target_project) print("Created collection %s" % target.manifest_locator()) {% endcodeblock %} h2(#copy-files-from-a-collection-to-another-collection). Copy files from a collection to another collection {% codeblock as python %} import arvados.collection source_collection = "zzzzz-4zz18-zzzzzzzzzzzzzzz" target_collection = "zzzzz-4zz18-aaaaaaaaaaaaaaa" files_to_copy = ["folder1/sample1/sample1_R1.fastq", "folder1/sample2/sample2_R1.fastq"] source = arvados.collection.CollectionReader(source_collection) target = arvados.collection.Collection(target_collection) for f in files_to_copy: target.copy(f, "", source_collection=source) target.save() {% endcodeblock %} h2(#delete-a-file-from-an-existing-collection). Delete a file from an existing collection {% codeblock as python %} import arvados c = arvados.collection.Collection("zzzzz-4zz18-zzzzzzzzzzzzzzz") c.remove("file2.txt") c.save() {% endcodeblock %} h2(#listing-records-with-paging). Listing records with paging Use the @arvados.util.keyset_list_all@ helper method to iterate over all the records matching an optional filter. This method handles paging internally and returns results incrementally using a Python iterator. The first parameter of the method takes a @list@ method of an Arvados resource (@collections@, @container_requests@, etc). {% codeblock as python %} import arvados.util api = arvados.api() for c in arvados.util.keyset_list_all(api.collections().list, filters=[["name", "like", "%sample123%"]]): print("got collection " + c["uuid"]) {% endcodeblock %} h2(#querying-the-vocabulary-definition). Querying the vocabulary definition The Python SDK provides facilities to interact with the "active metadata vocabulary":{{ site.baseurl }}/admin/metadata-vocabulary.html in the system. The developer can do key and value lookups in a case-insensitive manner: {% codeblock as python %} from arvados import api, vocabulary voc = vocabulary.load_vocabulary(api('v1')) [k.identifier for k in set(voc.key_aliases.values())] # Example output: ['IDTAGCOLORS', 'IDTAGFRUITS', 'IDTAGCOMMENT', 'IDTAGIMPORTANCES', 'IDTAGCATEGORIES', 'IDTAGSIZES', 'IDTAGANIMALS'] voc['IDTAGSIZES'].preferred_label # Example output: 'Size' [v.preferred_label for v in set(voc['size'].value_aliases.values())] # Example output: ['S', 'M', 'L', 'XL', 'XS'] voc['size']['s'].aliases # Example output: ['S', 'small'] voc['size']['Small'].identifier # Example output: 'IDVALSIZES2' {% endcodeblock %} h2(#translating-between-vocabulary-identifiers-and-labels). Translating between vocabulary identifiers and labels Client software might need to present properties to the user in a human-readable form or take input from the user without requiring them to remember identifiers. For these cases, there're a couple of conversion methods that take a dictionary as input like this: {% codeblock as python %} from arvados import api, vocabulary voc = vocabulary.load_vocabulary(api('v1')) voc.convert_to_labels({'IDTAGIMPORTANCES': 'IDVALIMPORTANCES1'}) # Example output: {'Importance': 'Critical'} voc.convert_to_identifiers({'creature': 'elephant'}) # Example output: {'IDTAGANIMALS': 'IDVALANIMALS3'} {% endcodeblock %} h2(#create-a-project). Create a Project {% codeblock as python %} import arvados parent_project_uuid = "zzzzz-j7d0g-zzzzzzzzzzzzzzz" project_name = "My project" g = arvados.api().groups().create(body={ "group": { "group_class": "project", "owner_uuid": parent_project_uuid, "name": project_name, }}).execute() print("New project uuid is", g["uuid"]) {% endcodeblock %}