19413: Optimize reporting warnings to API server

[arvados.git] / sdk / cwl / arvados_cwl / executor.py
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py

index 778af58ac3f7a1b71c040d5ec4f3332ecba11964..8635d5fcfed8490aa9c8e1e79eacc383d8d1f2e0 100644 (file)
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -252,6 +252,11 @@ The 'jobs' API is no longer supported.
          Called when there's a need to report errors, warnings or just
          activity statuses, for example in the RuntimeStatusLoggingHandler.
          """
+
+        if kind not in ('error', 'warning'):
+            # Ignore any other status kind
+            return
+
          with self.workflow_eval_lock:
              current = None
              try:
@@ -261,32 +266,35 @@ The 'jobs' API is no longer supported.
              if current is None:
                  return
              runtime_status = current.get('runtime_status', {})
-            if kind in ('error', 'warning'):
-                updatemessage = runtime_status.get(kind, "")
-                if not updatemessage:
-                    updatemessage = message
-
-                # Subsequent messages tacked on in detail
-                updatedetail = runtime_status.get(kind+'Detail', "")
-                maxlines = 40
-                if updatedetail.count("\n") < maxlines:
-                    if updatedetail:
-                        updatedetail += "\n"
-                    updatedetail += message + "\n"
-
-                    if detail:
-                        updatedetail += detail + "\n"
-
-                    if updatedetail.count("\n") >= maxlines:
-                        updatedetail += "\nSome messages may have been omitted.  Check the full log."
-
-                runtime_status.update({
-                    kind: updatemessage,
-                    kind+'Detail': updatedetail,
-                })
-            else:
-                # Ignore any other status kind
+
+            original_updatemessage = updatemessage = runtime_status.get(kind, "")
+            if not updatemessage:
+                updatemessage = message
+
+            # Subsequent messages tacked on in detail
+            original_updatedetail = updatedetail = runtime_status.get(kind+'Detail', "")
+            maxlines = 40
+            if updatedetail.count("\n") < maxlines:
+                if updatedetail:
+                    updatedetail += "\n"
+                updatedetail += message + "\n"
+
+                if detail:
+                    updatedetail += detail + "\n"
+
+                if updatedetail.count("\n") >= maxlines:
+                    updatedetail += "\nSome messages may have been omitted.  Check the full log."
+
+            if updatemessage == original_updatemessage and updatedetail == original_updatedetail:
+                # don't waste time doing an update if nothing changed
+                # (usually because we exceeded the max lines)
                  return
+
+            runtime_status.update({
+                kind: updatemessage,
+                kind+'Detail': updatedetail,
+            })
+
              try:
                  self.api.containers().update(uuid=current['uuid'],
                                              body={
@@ -565,8 +573,9 @@ The 'jobs' API is no longer supported.
          self.project_uuid = runtimeContext.project_uuid
  
          # Upload local file references in the job order.
-        job_order = upload_job_order(self, "%s input" % runtimeContext.name,
-                                     updated_tool, job_order, runtimeContext)
+        with Perf(metrics, "upload_job_order"):
+            job_order = upload_job_order(self, "%s input" % runtimeContext.name,
+                                         updated_tool, job_order, runtimeContext)
  
          # the last clause means: if it is a command line tool, and we
          # are going to wait for the result, and always_submit_runner
@@ -581,19 +590,23 @@ The 'jobs' API is no longer supported.
  
          loadingContext = self.loadingContext.copy()
          loadingContext.do_validate = False
+        loadingContext.disable_js_validation = True
          if submitting:
              loadingContext.do_update = False
              # Document may have been auto-updated. Reload the original
              # document with updating disabled because we want to
              # submit the document with its original CWL version, not
              # the auto-updated one.
-            tool = load_tool(updated_tool.tool["id"], loadingContext)
+            with Perf(metrics, "load_tool original"):
+                tool = load_tool(updated_tool.tool["id"], loadingContext)
          else:
              tool = updated_tool
  
          # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
          # Also uploads docker images.
-        merged_map = upload_workflow_deps(self, tool, runtimeContext)
+        logger.info("Uploading workflow dependencies")
+        with Perf(metrics, "upload_workflow_deps"):
+            merged_map = upload_workflow_deps(self, tool, runtimeContext)
  
          # Recreate process object (ArvadosWorkflow or
          # ArvadosCommandTool) because tool document may have been
@@ -602,7 +615,8 @@ The 'jobs' API is no longer supported.
          loadingContext.loader = tool.doc_loader
          loadingContext.avsc_names = tool.doc_schema
          loadingContext.metadata = tool.metadata
-        tool = load_tool(tool.tool, loadingContext)
+        with Perf(metrics, "load_tool"):
+            tool = load_tool(tool.tool, loadingContext)
  
          if runtimeContext.update_workflow or runtimeContext.create_workflow:
              # Create a pipeline template or workflow record and exit.