9552: If compute_checksum is true, check if checksum needs to be computed on final...
authorPeter Amstutz <peter.amstutz@curoverse.com>
Thu, 4 Aug 2016 14:23:12 +0000 (10:23 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Thu, 4 Aug 2016 14:27:48 +0000 (10:27 -0400)
sdk/cwl/arvados_cwl/__init__.py

index ce1cc26eeeaaf3236d7936d74cb249ef9fec8c89..99db9f1026863d4f1bf4def8fae92223ca7afdfd 100644 (file)
@@ -8,6 +8,7 @@ import logging
 import os
 import sys
 import threading
+import hashlib
 import pkg_resources  # part of setuptools
 
 from cwltool.errors import WorkflowException
@@ -24,6 +25,7 @@ from .arvtool import ArvadosCommandTool
 from .fsaccess import CollectionFsAccess
 
 from cwltool.process import shortname, UnsupportedRequirement
+from cwltool.pathmapper import adjustFileObjs
 from arvados.api import OrderedJsonModel
 
 logger = logging.getLogger('arvados.cwl-runner')
@@ -225,6 +227,19 @@ class ArvCwlRunner(object):
         if self.final_output is None:
             raise WorkflowException("Workflow did not return a result.")
 
+        if kwargs.get("compute_checksum"):
+            def compute_checksums(fileobj):
+                if "checksum" not in fileobj:
+                    checksum = hashlib.sha1()
+                    with self.fs_access.open(fileobj["location"], "rb") as f:
+                        contents = f.read(1024*1024)
+                        while contents != "":
+                            checksum.update(contents)
+                            contents = f.read(1024*1024)
+                    fileobj["checksum"] = "sha1$%s" % checksum.hexdigest()
+
+            adjustFileObjs(self.final_output, compute_checksums)
+
         return self.final_output