From f886d003dbb0b0928d4cc77c070c1b65b7ccacce Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Fri, 5 Jul 2013 16:12:35 -0400 Subject: [PATCH] start pgp data tutorial --- apps/workbench/app/views/users/home.html.erb | 8 ++ doc/user/tutorial-trait-search.textile | 132 +++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 doc/user/tutorial-trait-search.textile diff --git a/apps/workbench/app/views/users/home.html.erb b/apps/workbench/app/views/users/home.html.erb index 98b250c272..46dcb627d2 100644 --- a/apps/workbench/app/views/users/home.html.erb +++ b/apps/workbench/app/views/users/home.html.erb @@ -29,6 +29,14 @@
<%= link_to raw('Tutorial: Your first job ➜'), 'http://doc.arvados.org/user/tutorial-job1.html', class: "pull-right btn btn-primary" %> +
+
+ Search PGP data by trait +
+ Find PGP participants who reported a medical condition, and find WGS data for them. +
+ <%= link_to raw('Tutorial: Search PGP data ➜'), 'http://doc.arvados.org/user/tutorial-trait-search.html', class: "pull-right btn btn-primary" %> +
<% end %> <% end %> diff --git a/doc/user/tutorial-trait-search.textile b/doc/user/tutorial-trait-search.textile new file mode 100644 index 0000000000..e614b02898 --- /dev/null +++ b/doc/user/tutorial-trait-search.textile @@ -0,0 +1,132 @@ +--- +layout: default +navsection: userguide +title: "Tutorial: Search PGP data by trait" +navorder: 20 +--- + +h1. Tutorial: Search PGP data by trait + +Here you will use the Python SDK to find public WGS data for people who have a certain medical condition. + +h3. Prerequisites + +* Log in to a VM "using SSH":ssh-access.html +* Put an "API token":api-tokens.html in your @ARVADOS_API_TOKEN@ environment variable +* Put the API host name in your @ARVADOS_API_HOST@ environment variable +* Run the @python@ interactive shell. + +If everything is set up correctly, you will be able to import the arvados SDK: + +
+import arvados
+
+ +...and display your account information: + +
+arvados.service.users().current().execute()
+
+ +h3. More prerequisites + +
+import re
+import json
+
+ +h3. Find traits. + +List traits containing the term "cancer": + +
+for t in filter(lambda t: re.search('cancer', t['name']),
+                arvados.service.traits().list(limit=1000).execute()['items']):
+  print t['uuid'], t['name']
+
+ +↓ + +
+...
+1h9kt-q1cn2-nu86efg57kzus5z Cervical cancer
+1h9kt-q1cn2-0tl6w82sog50hlz Breast cancer
+1h9kt-q1cn2-zkdn8edncaagqui Non-melanoma skin cancer
+...
+
+ +We will use the "Non-melanoma skin cancer" trait with uuid @1h9kt-q1cn2-zkdn8edncaagqui@. + +
+trait_uuid = '1h9kt-q1cn2-zkdn8edncaagqui'
+
+ +h3. Find humans. + +List humans who report this condition: + +
+trait_links = arvados.service.links().list(limit=1000,where=json.dumps({
+    'link_class': 'human_trait',
+    'tail_kind': 'arvados#human',
+    'head_uuid': trait_uuid
+  })).execute()['items']
+
+ +The "tail_uuid" attribute of each of these Links refers to a Human. + +
+map(lambda l: l['tail_uuid'], trait_links)
+
+ +↓ + +
+[u'1h9kt-7a9it-c0uqa4kcdh29wdf', u'1h9kt-7a9it-x4tru6mn40hc6ah',
+u'1h9kt-7a9it-yqb8m5s9cpy88i8', u'1h9kt-7a9it-46sm75w200ngwny',
+u'1h9kt-7a9it-gx85a4tdkpzsg3w', u'1h9kt-7a9it-8cvlaa8909lgeo9',
+u'1h9kt-7a9it-as37qum2pq8vizb', u'1h9kt-7a9it-14fph66z2baqxb9',
+u'1h9kt-7a9it-e9zc7i4crmw3v69', u'1h9kt-7a9it-np7f35hlijlxdmt',
+u'1h9kt-7a9it-j9hqyjwbvo9cojn', u'1h9kt-7a9it-lqxdtm1gynmsv13',
+u'1h9kt-7a9it-zkhhxjfg2o22ywq', u'1h9kt-7a9it-nsjoxqd33lzldw9',
+u'1h9kt-7a9it-ytect4smzcgd4kg', u'1h9kt-7a9it-y6tl353b3jc4tos',
+u'1h9kt-7a9it-98f8qave4f8vbs5', u'1h9kt-7a9it-gd72sh15q0p4wq3',
+u'1h9kt-7a9it-zlx25dscak94q9h', u'1h9kt-7a9it-8gronw4rbgmim01',
+u'1h9kt-7a9it-wclfkjcb23tr5es', u'1h9kt-7a9it-rvp2qe7szfz4dy6',
+u'1h9kt-7a9it-50iffhmpzsktwjm', u'1h9kt-7a9it-ul412id5y31a5o8',
+u'1h9kt-7a9it-732kwkfzylmt4ik', u'1h9kt-7a9it-v9zqxegpblsbtai',
+u'1h9kt-7a9it-kmaraqduit1v5wd', u'1h9kt-7a9it-t1nwtlo1hru5vvq',
+u'1h9kt-7a9it-q3w6j9od4ibpoyl', u'1h9kt-7a9it-qz8vzkuuz97ezwv',
+u'1h9kt-7a9it-t1v8sjz6dm9jmjf', u'1h9kt-7a9it-qe8wrbyvuqs5jew']
+
+ +h3. Find huIDs. + +For now we don't need to look up the Human objects themselves. We just need to look up "identifier" links to find their huIDs: + +
+human_uuids = map(lambda l: l['tail_uuid'], trait_links)
+huid_links = arvados.service.links().list(limit=1000,where=json.dumps({
+    "link_class": "identifier",
+    "head_uuid": human_uuids
+  })).execute()['items']
+map(lambda l: l['name'], huid_links)
+
+ +↓ + +
+[u'huE2E371', u'huDF04CC', u'huD3A569', u'huD09534', u'huD09050', u'huB4883B',
+ u'huB1FD55', u'huAB8707', u'hu9E356F', u'hu94040B', u'hu7A2F1D', u'hu7260DD',
+ u'hu6C3F34', u'hu68F245', u'hu633787', u'hu602487', u'hu5E55F5', u'hu599905',
+ u'hu5917F3', u'hu56B3B6', u'hu553620', u'hu474789', u'hu43860C', u'hu414115',
+ u'hu397733', u'hu34A921', u'hu237A50', u'hu1BD549', u'hu174334', u'hu15402B',
+ u'hu11603C', u'hu01024B']
+
+ +These huIDs let us find public profiles: + +* "https://my.personalgenomes.org/profile/huE2E371":https://my.personalgenomes.org/profile/huE2E371 +* "https://my.personalgenomes.org/profile/huDF04CC":https://my.personalgenomes.org/profile/huDF04CC +* ... + -- 2.30.2