18799: Force UTF-8 encoding for discovery2pydoc output

[arvados.git] / doc / sdk / python / cookbook.html.textile.liquid
diff --git a/doc/sdk/python/cookbook.html.textile.liquid b/doc/sdk/python/cookbook.html.textile.liquid

index aaf23a2849f064d51ae8988cc95822c1c39a4df5..f2d087625e662347d44f2b458159c97a926dfe2b 100644 (file)
--- a/doc/sdk/python/cookbook.html.textile.liquid
+++ b/doc/sdk/python/cookbook.html.textile.liquid
@@ -28,21 +28,29 @@ SPDX-License-Identifier: CC-BY-SA-3.0
  # "Working with collections":#working-with-collections
  ## "Load and update an existing collection":#load-collection
  ## "Create and save a new collection":#create-collection
-## "Read a file from a collection":#download-a-file-from-a-collection
-## "Write a file to a collection":#upload-a-file-into-a-new-collection
+## "Read a file from a collection":#read-a-file-from-a-collection
+## "Download a file from a collection":#download-a-file-from-a-collection
+## "Write a file to a collection":#write-a-file-into-a-new-collection
+## "Upload a file to a collection":#upload-a-file-into-a-new-collection
  ## "Delete a file from a collection":#delete-a-file-from-an-existing-collection
+## "Delete a directory from a collection recursively":#delete-a-directory-from-a-collection
+## "Walk over all files in a collection":#walk-collection
  ## "Copy a file between collections":#copy-files-from-a-collection-to-another-collection
  ## "Combine two or more collections":#combine-two-or-more-collections
  ## "Create a collection sharing link":#sharing-link
  # "Working with containers and workflow runs":#working-with-containers
-## "Get input of a container or CWL workflow run":#get-input-of-a-cwl-workflow
-## "Get output of a container or CWL workflow run":#get-output-of-a-cwl-workflow
+## "Get input of a container":#get-input-of-a-container
+## "Get input of a CWL workflow run":#get-input-of-a-cwl-workflow
+## "Get output of a container":#get-output-of-a-container
+## "Get output of a CWL workflow run":#get-output-of-a-cwl-workflow
  ## "Get logs of a container or CWL workflow run":#get-log-of-a-child-request
  ## "Get status of a container or CWL workflow run":#get-state-of-a-cwl-workflow
  ## "List child requests of a container or CWL workflow run":#list-failed-child-requests
+## "List child requests of a container request":#list-child-requests-of-container-request
  # "Working with the container request queue":#working-with-container-request-queue
  ## "List completed container requests":#list-completed-container-requests
  ## "Cancel a container request":#cancel-a-container-request
+## "Cancel multiple pending container requests":#cancel-all-container-requests
  
  h2(#introduction). Introduction
  
@@ -77,7 +85,8 @@ The API provides a "dedicated groups method named @shared@":{{ site.baseurl }}/a
  
  {% codeblock as python %}
  for item in arvados.util.keyset_list_all(
-    # Do *not* call the method here, just pass it.
+    # Pass the method keyset_list_all will call to retrieve items.
+    # Do not call it yourself.
      arv_client.groups().shared,
      # Pass filters to limit what objects are returned.
      # This example returns only subprojects.
@@ -122,7 +131,8 @@ The API provides a "dedicated groups method named @contents@":{{ site.baseurl }}
  {% codeblock as python %}
  current_user = arv_client.users().current().execute()
  for item in arvados.util.keyset_list_all(
-    # Do *not* call the method here, just pass it.
+    # Pass the method keyset_list_all will call to retrieve items.
+    # Do not call it yourself.
      arv_client.groups().contents,
      # The UUID of the project whose contents we're listing.
      # Pass a user UUID to list their home project.
@@ -152,7 +162,7 @@ In brief, a permission is represented in Arvados as a link object with the follo
  * @tail_uuid@ identifies the user or role group that receives the permission.
  * @head_uuid@ identifies the Arvados object this permission grants access to.
  
-For details, refer to the "Permissions model documentation":{{ site.baseurl }}/api/permission-model.html. Managing permissions is just a matter of ensuring the desired links exist with the standard @create@, @update@, and @delete@ methods.
+For details, refer to the "Permissions model documentation":{{ site.baseurl }}/api/permission-model.html. Managing permissions is just a matter of ensuring the desired links exist using the standard @create@, @update@, and @delete@ methods.
  
  h3(#grant-permission). Grant permission to an object
  
@@ -181,7 +191,8 @@ To modify an existing permission—for example, to change its access level—fin
  {% codeblock as python %}
  import arvados.util
  for permission in arvados.util.keyset_list_all(
-    # Do *not* call the method here, just pass it.
+    # Pass the method keyset_list_all will call to retrieve items.
+    # Do not call it yourself.
      arv_client.links().list,
      filters=[
          # You should use this filter for all permission searches,
@@ -210,7 +221,8 @@ To revoke an existing permission, find the existing link object for the permissi
  {% codeblock as python %}
  import arvados.util
  for permission in arvados.util.keyset_list_all(
-    # Do *not* call the method here, just pass it.
+    # Pass the method keyset_list_all will call to retrieve items.
+    # Do not call it yourself.
      arv_client.links().list,
      filters=[
          # You should use this filter for all permission searches,
@@ -229,7 +241,9 @@ for permission in arvados.util.keyset_list_all(
  
  h2(#working-with-properties). Working with properties
  
-Container requests, collections, groups, and links can have metadata properties set through their @properties@ field. These properties may be standardized or limited to a defined vocabulary on your cluster. This section provides basic recipes for working with all kinds of properties. For details, refer to the "Metadata properties API reference":{{ site.baseurl }}/api/properties.html.
+Container requests, collections, groups, and links can have metadata properties set through their @properties@ field. For details, refer to the "Metadata properties API reference":{{ site.baseurl }}/api/properties.html.
+
+An Arvados cluster can be configured to use a metadata vocabulary. If this is set up, the vocabulary defines standard identifiers and specific properties and their values. These identifiers can also have more human-friendly aliases. The cluster can also be configured to use the vocabulary strictly, so clients may _only_ set properties on objects that are defined in the vocabulary. For more information about configuring a metadata vocabulary, refer to the "Metadata vocabulary administration documentation":{{ site.baseurl }}/admin/metadata-vocabulary.html.
  
  h3(#update-properties). Update the properties of an object
  
@@ -365,63 +379,61 @@ new_collection.save_new(
  )
  {% endcodeblock %}
  
-h3(#download-a-file-from-a-collection). Read a file from a collection
+h3(#read-a-file-from-a-collection). Read a file from a collection
  
-Once you have a @Collection@ object, the "@Collection.open@ method":{{ site.baseurl }}/sdk/python/arvados/collection.html#arvados.collection.RichCollectionBase.open lets you open files from a collection the same way you would open files from disk using Python's built-in @open@ function. It returns a file-like object that you can use in many of the same ways you would use any other file object.
+Once you have a @Collection@ object, the "@Collection.open@ method":{{ site.baseurl }}/sdk/python/arvados/collection.html#arvados.collection.RichCollectionBase.open lets you open files from a collection the same way you would open files from disk using Python's built-in @open@ function. It returns a file-like object that you can use in many of the same ways you would use any other file object. This example prints all non-empty lines from @ExampleFile@ in your collection:
  
  {% codeblock as python %}
  import arvados.collection
  collection = arvados.collection.Collection(...)
  with collection.open('ExampleFile') as my_file:
-    ...
-{% endcodeblock %}
-
-For a low-level example, this code prints all non-empty lines from @ExampleFile@ in your collection:
-
-{% codeblock as python %}
-with collection.open('ExampleFile') as my_file:
+    # Read from my_file as desired.
+    # This example prints all non-empty lines from the file to stdout.
      for line in my_file:
          if not line.isspace():
              print(line, end='')
  {% endcodeblock %}
  
-For a higher-level example, you can pass the returned file object as a source for Python's standard "@shutil.copyfileobj@ function":https://docs.python.org/3/library/shutil.html#shutil.copyfileobj to download it. This code downloads @ExampleFile@ from your collection and saves it to the current working directory as @ExampleDownload@:
+h3(#download-a-file-from-a-collection). Download a file from a collection
+
+Once you have a @Collection@ object, the "@Collection.open@ method":{{ site.baseurl }}/sdk/python/arvados/collection.html#arvados.collection.RichCollectionBase.open lets you open files from a collection the same way you would open files from disk using Python's built-in @open@ function. You pass a second mode argument like @'rb'@ to open the file in binary mode. It returns a file-like object that you can use in many of the same ways you would use any other file object. You can pass it as a source to Python's standard "@shutil.copyfileobj@ function":https://docs.python.org/3/library/shutil.html#shutil.copyfileobj to download it. This code downloads @ExampleFile@ from your collection and saves it to the current working directory as @ExampleDownload@:
  
  {% codeblock as python %}
+import arvados.collection
  import shutil
+collection = arvados.collection.Collection(...)
  with (
-  collection.open('ExampleFile') as src_file,
-  open('ExampleDownload', 'w') as dst_file,
+  collection.open('ExampleFile', 'rb') as src_file,
+  open('ExampleDownload', 'wb') as dst_file,
  ):
      shutil.copyfileobj(src_file, dst_file)
  {% endcodeblock %}
  
-h3(#upload-a-file-into-a-new-collection). Write a file to a collection
+h3(#write-a-file-into-a-new-collection). Write a file to a collection
  
-Once you have a @Collection@ object, the "@Collection.open@ method":{{ site.baseurl }}/sdk/python/arvados/collection.html#arvados.collection.RichCollectionBase.open lets you open files from a collection the same way you would open files from disk using Python's built-in @open@ function. Pass a second mode argument like @'w'@ or @'a'@ to write a file in the collection. It returns a file-like object that you can use in many of the same ways you would use any other file object.
+Once you have a @Collection@ object, the "@Collection.open@ method":{{ site.baseurl }}/sdk/python/arvados/collection.html#arvados.collection.RichCollectionBase.open lets you open files from a collection the same way you would open files from disk using Python's built-in @open@ function. Pass a second mode argument like @'w'@, @'a'@, or @'wb'@ to write a file in the collection. It returns a file-like object that you can use in many of the same ways you would use any other file object. This example writes @Hello, Arvados!@ to a file named @ExampleHello@ in your collection:
  
  {% codeblock as python %}
  import arvados.collection
  collection = arvados.collection.Collection(...)
  with collection.open('ExampleFile', 'w') as my_file:
-    ...
-{% endcodeblock %}
-
-For a low-level example, this code writes @Hello, world!@ to a file named @ExampleHello@ in your collection:
-
-{% codeblock as python %}
-with collection.open('ExampleHello', 'w') as my_file:
-    print("Hello, world!", file=my_file)
+    # Write to my_file as desired.
+    # This example writes "Hello, Arvados!" to the file.
+    print("Hello, Arvados!", file=my_file)
  collection.save_new(...)  # or collection.save() to update an existing collection
  {% endcodeblock %}
  
-For a higher-level example, you can pass the returned file object as a destination for Python's standard "@shutil.copyfileobj@ function":https://docs.python.org/3/library/shutil.html#shutil.copyfileobj to upload a file to a collection. This code reads @ExampleFile@ from the current working directory and uploads it into your collection as @ExampleUpload@:
+h3(#upload-a-file-into-a-new-collection). Upload a file to a collection
+
+Once you have a @Collection@ object, the "@Collection.open@ method":{{ site.baseurl }}/sdk/python/arvados/collection.html#arvados.collection.RichCollectionBase.open lets you open files from a collection the same way you would open files from disk using Python's built-in @open@ function. Pass a second mode argument like @'w'@, @'a'@, or @'wb'@ to write a file in the collection. It returns a file-like object that you can use in many of the same ways you would use any other file object. You can pass it as a destination to Python's standard "@shutil.copyfileobj@ function":https://docs.python.org/3/library/shutil.html#shutil.copyfileobj to upload data from a source file. This example reads @ExampleFile@ from the current working directory and uploads it into your collection as @ExampleUpload@:
  
  {% codeblock as python %}
+import arvados.collection
  import shutil
+collection = arvados.collection.Collection(...)
  with (
-  open('ExampleFile') as src_file,
-  collection.open('ExampleUpload', 'w') as dst_file,
+  open('ExampleFile', 'rb') as src_file,
+  collection.open('ExampleUpload', 'wb') as dst_file,
  ):
      shutil.copyfileobj(src_file, dst_file)
  collection.save_new(...)  # or collection.save() to update an existing collection
@@ -438,7 +450,9 @@ collection.remove('ExamplePath')
  collection.save_new(...)  # or collection.save() to update an existing collection
  {% endcodeblock %}
  
-Like most Unix tools, @Collection.remove@ will raise an error if you try to remove a non-empty directory. Pass @recursive=True@ to delete everything under that directory from the collection:
+h3(#delete-a-directory-from-a-collection). Delete a directory from a collection recursively
+
+Once you have a @Collection@ object, call the "@Collection.remove@ method":{{ site.baseurl }}/sdk/python/arvados/collection.html#arvados.collection.Collection.remove with a directory path and @recursive=True@ to delete everything under that directory from the collection.
  
  {% codeblock as python %}
  import arvados.collection
@@ -447,6 +461,32 @@ collection.remove('ExampleDirectoryPath', recursive=True)
  collection.save_new(...)  # or collection.save() to update an existing collection
  {% endcodeblock %}
  
+h3(#walk-collection). Walk over all files in a collection
+
+Once you have a @Collection@ object, you can iterate over it to retrieve the names of all files and streams in it. Streams are like subdirectories: you can open them using the "@Collection.find@ method":{{ site.baseurl }}/sdk/python/python.html, and work with the files in them just like you would in the original collection. This example shows how to combine these techniques to iterate all files in a collection, including its streams.
+
+{% codeblock as python %}
+import arvados.collection
+import collections
+import pathlib
+root_collection = arvados.collection.Collection(...)
+# Start work from the base stream.
+stream_queue = collections.deque(['.'])
+while stream_queue:
+    stream_name = stream_queue.popleft()
+    collection = root_collection.find(stream_name)
+    for item_name in collection:
+        try:
+            my_file = collection.open(item_name)
+        except IsADirectoryError:
+            # item_name refers to a stream. Queue it to walk later.
+            stream_path = pathlib.Path(stream_name, item_name)
+            stream_queue.append(stream_path.as_posix())
+            continue
+        with my_file:
+            ...  # Work with my_file as desired
+{% endcodeblock %}
+
  h3(#copy-files-from-a-collection-to-another-collection). Copy a file between collections
  
  Once you have one or more @Collection@ objects, call the "@Collection.copy@ method":{{ site.baseurl }}/sdk/python/arvados/collection.html#arvados.collection.RichCollectionBase.copy on the destination collection to copy files to it. This method doesn't re-upload data, so it's very efficient.
@@ -549,9 +589,9 @@ If you have experience running CWL workflows on Workbench 2, it runs through thi
  
  The UUID of the CWL runner container is recorded in the @requesting_container_uuid@ field of each container request it creates. You can list container requests with a filter on this field to inspect each step of the workflow individually, as shown below.
  
-The next few sections include two examples: a high-level example that describes how to work with any container request, and a more specific example that provides more detail about how to work with CWL workflow runs.
+The next few examples show how to perform a task with a container request generally, and then provide a more specific example of working with a CWL runner container.
  
-h3(#get-input-of-a-cwl-workflow). Get input of a container or CWL workflow run
+h3(#get-input-of-a-container). Get input of a container
  
  A container request's most varied inputs are recorded in the @mounts@ field, which can include data from Keep, specific collections, Git checkouts, and static files. You might also be interested in the @environment@, @command@, @container_image@, and @secret_mounts@ fields. Refer to the "container requests API documentation":{{ site.baseurl }}/api/methods/container_requests.html for details.
  
@@ -576,7 +616,9 @@ for mount_name, mount_source in container_request['mounts'].items():
          pprint.pprint(mount_source.get('content'))
  {% endcodeblock %}
  
-When you run a CWL workflow, the CWL inputs are stored in a JSON mount named @/var/lib/cwl/cwl.input.json@.
+h3(#get-input-of-a-cwl-workflow). Get input of a CWL workflow run
+
+When you run a CWL workflow, the CWL inputs are stored in the container request's @mounts@ field as a JSON mount named @/var/lib/cwl/cwl.input.json@.
  
  {% codeblock as python %}
  container_request = arv_client.container_requests().get(
@@ -586,7 +628,7 @@ cwl_input = container_request['mounts']['/var/lib/cwl/cwl.input.json']['content'
  ...  # Work with the cwl_input dictionary
  {% endcodeblock %}
  
-h3(#get-output-of-a-cwl-workflow). Get output of a container or CWL workflow run
+h3(#get-output-of-a-container). Get output of a container
  
  A container's output files are saved in a collection. The UUID of that collection is recorded in the @output_uuid@ of the container request, which you can load as you like.
  
@@ -601,7 +643,9 @@ container_output = arvados.collection.Collection(
  ...  # Work with the container_output collection object
  {% endcodeblock %}
  
-When you run a CWL workflow, the output collection includes a file named @cwl.output.json@ that provides additional information about other files in the output.
+h3(#get-output-of-a-cwl-workflow). Get output of a CWL workflow run
+
+When you run a CWL workflow, the container request's output collection includes a file named @cwl.output.json@ that provides additional information about other files in the output.
  
  {% codeblock as python %}
  import arvados.collection
@@ -675,7 +719,8 @@ When a running container creates a container request to do additional work, the
  {% codeblock as python %}
  import arvados.util
  for child_container_requests in arvados.util.keyset_list_all(
-    # Do *not* call the method here, just pass it.
+    # Pass the method keyset_list_all will call to retrieve items.
+    # Do not call it yourself.
      arv_client.container_requests().list,
      filters=[
          # Note this is a container UUID, *not* a container request UUID
@@ -689,7 +734,9 @@ for child_container_requests in arvados.util.keyset_list_all(
      ...  # Work with each child container request
  {% endcodeblock %}
  
-If your input only provides the UUID for a container request rather than a container, you can get that container request, then follow the @container_uuid@ field if it is set. (It might not be if the container request has not been dispatched yet.)
+h3(#list-child-requests-of-container-request). List child requests of a container request
+
+When a running container creates a container request to do additional work, the UUID of the source container is recorded in the @requesting_container_uuid@ field of the new container request. If all you have is the UUID of a container request, you can get that request, then list container requests with a filter where @requesting_container_uuid@ matches the @container_uuid@ of your request to find all its children.
  
  {% codeblock as python %}
  import arvados.util
@@ -702,7 +749,8 @@ if parent_container_uuid is None:
      child_container_requests = ()
  else:
      child_container_requests = arvados.util.keyset_list_all(
-    # Do *not* call the method here, just pass it.
+        # Pass the method keyset_list_all will call to retrieve items.
+        # Do not call it yourself.
          arv_client.container_requests().list,
          filters=[
              ['requesting_container_uuid', '=', parent_container_uuid],
@@ -731,7 +779,8 @@ time_filter = datetime.datetime.utcnow()
  time_filter -= datetime.timedelta(days=7)
  
  for container_request in arvados.util.keyset_list_all(
-    # Do *not* call the method here, just pass it.
+    # Pass the method keyset_list_all will call to retrieve items.
+    # Do not call it yourself.
      arv_client.container_requests().list,
      filters=[
          # This is the filter you need to find completed container requests.
@@ -769,12 +818,15 @@ cancelled_container_request = arv_client.container_requests().update(
  ).execute()
  {% endcodeblock %}
  
-p(#cancel-all-container-requests). If you want to cancel many container requests, you can list container requests with the @state@ field set to @"Committed"@, a @priority@ greater than zero, and any other filters you like. Then update each container request in turn.
+h3(#cancel-all-container-requests). Cancel multiple pending container requests
+
+If you want to cancel multiple pending container requests, you can list container requests with the @state@ field set to @"Committed"@, a @priority@ greater than zero, and any other filters you like. Then update each container request to set its @priority@ field to 0. See the "containers API reference":{{ site.baseurl }}/api/methods/containers.html for details.
  
  {% codeblock as python %}
  import arvados.util
  for container_request in arvados.util.keyset_list_all(
-    # Do *not* call the method here, just pass it.
+    # Pass the method keyset_list_all will call to retrieve items.
+    # Do not call it yourself.
      arv_client.container_requests().list,
      filters=[
          # These are the filters you need to find cancellable container requests.