6706: Clear inode/inode cache dicts on destroy() instead of setting inodes to None.

[arvados.git] / doc / _includes / _tutorial_hash_script_py.liquid
diff --git a/doc/_includes/_tutorial_hash_script_py.liquid b/doc/_includes/_tutorial_hash_script_py.liquid

index 0dcabaefc14a0d38f4883708243823ef5a543826..ede28091de45f1b14556b721722153c3b1f3d70f 100644 (file)
--- a/doc/_includes/_tutorial_hash_script_py.liquid
+++ b/doc/_includes/_tutorial_hash_script_py.liquid
@@ -1,45 +1,45 @@
  #!/usr/bin/env python
  
-import hashlib      # Import the hashlib module to compute md5.
+import hashlib      # Import the hashlib module to compute MD5.
+import os           # Import the os module for basic path manipulation
  import arvados      # Import the Arvados sdk module
  
  # Automatically parallelize this job by running one task per file.
  # This means that if the input consists of many files, each file will
-# be processed in parallel on different nodes enabling the job to 
+# be processed in parallel on different nodes enabling the job to
  # be completed quicker.
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True, 
+arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
                                            input_as_path=True)
  
-# Create the message digest object that will compute the md5 hash
+# Get object representing the current task
+this_task = arvados.current_task()
+
+# Create the message digest object that will compute the MD5 hash
  digestor = hashlib.new('md5')
  
  # Get the input file for the task
-input_file = arvados.get_task_param_mount('input')
+input_id, input_path = this_task['parameters']['input'].split('/', 1)
  
-# Open the input file for reading
-with open(input_file) as f:
-    while True:
-        buf = f.read(2**20)      # read a 1 megabyte block from the file
-        if len(buf) == 0:        # break when there is no more data left
-            break
-        digestor.update(buf)     # update the md5 hash object
+# Open the input collection
+input_collection = arvados.CollectionReader(input_id)
  
-# Get object representing the current task
-this_task = arvados.current_task()
+# Open the input file for reading
+with input_collection.open(input_path) as input_file:
+    for buf in input_file.readall():  # Iterate the file's data blocks
+        digestor.update(buf)          # Update the MD5 hash object
  
- # Write a new collection as output
+# Write a new collection as output
  out = arvados.CollectionWriter()
  
- # Set output file within the collection
-out.set_current_file_name("md5sum.txt")
-
-# Write an output line with the md5 value and input
-out.write("%s %s\n" % (digestor.hexdigest(), this_task['parameters']['input']))
+# Write an output file with one line: the MD5 value and input path
+with out.open('md5sum.txt') as out_file:
+    out_file.write("{} {}/{}\n".format(digestor.hexdigest(), input_id,
+                                       os.path.normpath(input_path)))
  
- # Commit the output to keep.  This returns a Keep id.
-output_id = out.finish()
+# Commit the output to Keep.
+output_locator = out.finish()
  
-# Set the output for this task to the Keep id
-this_task.set_output(output_id) 
+# Use the resulting locator as the output for this task.
+this_task.set_output(output_locator)
  
  # Done!