19464: Record git info as properties
[arvados.git] / sdk / cwl / arvados_cwl / executor.py
index 07e574e1fe4a8743424c0b2b2847f4533b81162e..35d4622c6e96d63c75f4da346ed1e370a080650a 100644 (file)
@@ -523,11 +523,13 @@ The 'jobs' API is no longer supported.
     def get_git_info(self, tool):
         in_a_git_repo = False
         cwd = None
+        filepath = None
 
         if tool.tool["id"].startswith("file://"):
             # check if git is installed
             try:
-                cwd = os.path.dirname(uri_file_path(tool.tool["id"]))
+                filepath = uri_file_path(tool.tool["id"])
+                cwd = os.path.dirname(filepath)
                 subprocess.run(["git", "log", "--format=%H", "-n1", "HEAD"], cwd=cwd, check=True, capture_output=True, text=True)
                 in_a_git_repo = True
             except Exception as e:
@@ -542,6 +544,9 @@ The 'jobs' API is no longer supported.
             git_branch = subprocess.run(["git", "branch", "--show-current"], cwd=cwd, capture_output=True, text=True).stdout
             git_origin = subprocess.run(["git", "remote", "get-url", "origin"], cwd=cwd, capture_output=True, text=True).stdout
             git_status = subprocess.run(["git", "status", "--untracked-files=no", "--porcelain"], cwd=cwd, capture_output=True, text=True).stdout
+            git_describe = subprocess.run(["git", "describe", "--always"], cwd=cwd, capture_output=True, text=True).stdout
+            git_toplevel = subprocess.run(["git", "rev-parse", "--show-toplevel"], cwd=cwd, capture_output=True, text=True).stdout
+            git_path = filepath[len(git_toplevel):]
 
             gitproperties = {
                 "http://arvados.org/cwl#gitCommit": git_commit.strip(),
@@ -550,6 +555,8 @@ The 'jobs' API is no longer supported.
                 "http://arvados.org/cwl#gitBranch": git_branch.strip(),
                 "http://arvados.org/cwl#gitOrigin": git_origin.strip(),
                 "http://arvados.org/cwl#gitStatus": git_status.strip(),
+                "http://arvados.org/cwl#gitDescribe": git_describe.strip(),
+                "http://arvados.org/cwl#gitPath": git_path.strip(),
             }
         else:
             for g in ("http://arvados.org/cwl#gitCommit",
@@ -557,18 +564,26 @@ The 'jobs' API is no longer supported.
                       "http://arvados.org/cwl#gitCommitter",
                       "http://arvados.org/cwl#gitBranch",
                       "http://arvados.org/cwl#gitOrigin",
-                      "http://arvados.org/cwl#gitStatus"):
+                      "http://arvados.org/cwl#gitStatus",
+                      "http://arvados.org/cwl#gitDescribe",
+                      "http://arvados.org/cwl#gitPath"):
                 if g in tool.metadata:
                     gitproperties[g] = tool.metadata[g]
 
         return gitproperties
 
+    def set_container_request_properties(self, container, properties):
+        resp = self.api.container_requests().list(filters=[["container_uuid", "=", container["uuid"]]], select=["uuid", "properties"]).execute(num_retries=self.num_retries)
+        for cr in resp["item"]:
+            cr["properties"].update({k.replace("http://arvados.org/cwl#", "arv:"): v for k, v in properties.items()})
+            self.api.container_requests().update(uuid=cr["uuid"], body={"container_request": {"properties": cr["properties"]}}).execute(num_retries=self.num_retries)
+
     def arv_executor(self, updated_tool, job_order, runtimeContext, logger=None):
         self.debug = runtimeContext.debug
 
         git_info = self.get_git_info(updated_tool)
         if git_info:
-            logger.info("Provenance of %s", updated_tool.tool["id"])
+            logger.info("Git provenance")
             for g in git_info:
                 if git_info[g]:
                     logger.info("  %s: %s", g.split("#", 1)[1], git_info[g])
@@ -606,7 +621,10 @@ The 'jobs' API is no longer supported.
             runtimeContext.intermediate_storage_classes = default_storage_classes
 
         if not runtimeContext.name:
-            runtimeContext.name = self.name = updated_tool.tool.get("label") or updated_tool.metadata.get("label") or os.path.basename(updated_tool.tool["id"])
+            self.name = updated_tool.tool.get("label") or updated_tool.metadata.get("label") or os.path.basename(updated_tool.tool["id"])
+            if git_info.get("http://arvados.org/cwl#gitDescribe"):
+                self.name = "%s (%s)" % (self.name, git_info.get("http://arvados.org/cwl#gitDescribe"))
+            runtimeContext.name = self.name
 
         if runtimeContext.copy_deps is None and (runtimeContext.create_workflow or runtimeContext.update_workflow):
             # When creating or updating workflow record, by default
@@ -764,6 +782,7 @@ The 'jobs' API is no longer supported.
         current_container = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
         if current_container:
             logger.info("Running inside container %s", current_container.get("uuid"))
+            self.set_container_request_properties(current_container, git_info)
 
         self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout)
         self.polling_thread = threading.Thread(target=self.poll_states)