9865: Simplifies ACR exception handling
authorEric Biagiotti <ebiagiotti@veritasgenetcs.com>
Fri, 22 Feb 2019 20:42:45 +0000 (15:42 -0500)
committerEric Biagiotti <ebiagiotti@veritasgenetcs.com>
Fri, 22 Feb 2019 20:42:45 +0000 (15:42 -0500)
- Ensures stack trace info is included where appropriate
- Adds comments clarifying exclusion of stack traces in some instances
- Removes exceptions from the formatted log string since logger.exception will include it

Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti@veritasgenetics.com>

sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvdocker.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/runner.py

index 52fd4d21a115f29ee7fb388c11ebeb0564b40ae2..834ca195fdda02d859eafcd4aa0cea5c70c1c359 100644 (file)
@@ -289,10 +289,12 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
         if keep_client is None:
             keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
         executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
-    except Exception as e:
-        logger.error(e)
+    except Exception:
+        logger.exception("Error creating the Arvados CWL Executor")
         return 1
 
+    # Note that unless in debug mode, some stack traces related to user 
+    # workflow errors may be suppressed. See ArvadosJob.done().
     if arvargs.debug:
         logger.setLevel(logging.DEBUG)
         logging.getLogger('arvados').setLevel(logging.DEBUG)
index af7c02a8f30010bfe85e51a6928e63a5a617d37e..5fe29bc8f225a8f56da72dee262279a4e9357e74 100644 (file)
@@ -304,8 +304,8 @@ class ArvadosContainer(JobBase):
                 logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"])
             else:
                 logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
-        except Exception as e:
-            logger.error("%s got error %s" % (self.arvrunner.label(self), str(e)))
+        except Exception:
+            logger.exception("%s got an error", self.arvrunner.label(self))
             self.output_callback({}, "permanentFail")
 
     def done(self, record):
@@ -353,11 +353,13 @@ class ArvadosContainer(JobBase):
             if container["output"]:
                 outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
         except WorkflowException as e:
+            # Only include a stack trace if in debug mode. 
+            # A stack trace may obfuscate more useful output about the workflow. 
             logger.error("%s unable to collect output from %s:\n%s",
                          self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False))
             processStatus = "permanentFail"
-        except Exception as e:
-            logger.exception("%s while getting output object: %s", self.arvrunner.label(self), e)
+        except Exception:
+            logger.exception("%s while getting output object:", self.arvrunner.label(self))
             processStatus = "permanentFail"
         finally:
             self.output_callback(outputs, processStatus)
@@ -523,8 +525,8 @@ class RunnerContainer(Runner):
             container = self.arvrunner.api.containers().get(
                 uuid=record["container_uuid"]
             ).execute(num_retries=self.arvrunner.num_retries)
-        except Exception as e:
-            logger.exception("%s while getting runner container: %s", self.arvrunner.label(self), e)
+        except Exception:
+            logger.exception("%s while getting runner container", self.arvrunner.label(self))
             self.arvrunner.output_callback({}, "permanentFail")
         else:
             super(RunnerContainer, self).done(container)
index 84006b47d2a8ba86fd97f88b63772a53e3d711f6..a8f56ad1d4f30db21c5a59f0fb6df9258c723c58 100644 (file)
@@ -63,6 +63,7 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
                 arvados.commands.put.api_client = api_client
                 arvados.commands.keepdocker.main(args, stdout=sys.stderr, install_sig_handlers=False, api=api_client)
             except SystemExit as e:
+                # If e.code is None or zero, then keepdocker exited normally and we can continue
                 if e.code:
                     raise WorkflowException("keepdocker exited with code %s" % e.code)
 
index 69fe7e2a8f1b632675b16eeb3f6ad07ee76c00e8..086e299a567ec94cbd138af4afd29c578a9c08c3 100644 (file)
@@ -199,7 +199,7 @@ class ArvadosJob(JobBase):
                                     e)
             else:
                 logger.info("%s %s is %s", self.arvrunner.label(self), response["uuid"], response["state"])
-        except Exception as e:
+        except Exception
             logger.exception("%s error" % (self.arvrunner.label(self)))
             self.output_callback({}, "permanentFail")
 
@@ -224,8 +224,8 @@ class ArvadosJob(JobBase):
                             body={
                                 "components": components
                             }).execute(num_retries=self.arvrunner.num_retries)
-                except Exception as e:
-                    logger.info("Error adding to components: %s", e)
+                except Exception:
+                    logger.exception("Error adding to components")
 
     def done(self, record):
         try:
@@ -272,10 +272,12 @@ class ArvadosJob(JobBase):
                         outputs = done.done(self, record, dirs["tmpdir"],
                                             dirs["outdir"], dirs["keep"])
             except WorkflowException as e:
+                # Only include a stack trace if in debug mode. 
+                # This is most likely a user workflow error and a stack trace may obfuscate more useful output. 
                 logger.error("%s unable to collect output from %s:\n%s",
                              self.arvrunner.label(self), record["output"], e, exc_info=(e if self.arvrunner.debug else False))
                 processStatus = "permanentFail"
-            except Exception as e:
+            except Exception:
                 logger.exception("Got unknown exception while collecting output for job %s:", self.name)
                 processStatus = "permanentFail"
 
index 535cfd7582b985ad806d659f518f9da9ce0e6fbc..ce7ea1ad6acbfd851d98fcbe2097ca640047d5f2 100644 (file)
@@ -506,8 +506,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                               body={
                                                   'is_trashed': True
                                               }).execute(num_retries=self.num_retries)
-            except Exception as e:
-                logger.info("Setting container output: %s", e)
+            except Exception:
+                logger.exception("Setting container output")
         elif self.work_api == "jobs" and "TASK_UUID" in os.environ:
             self.api.job_tasks().update(uuid=os.environ["TASK_UUID"],
                                    body={
index ad8e903ba65f28bec56710a3ed15915482362d72..65bce95f78c6742de94f230274e28dfd9316e093 100644 (file)
@@ -474,8 +474,8 @@ class Runner(Process):
                     fileobj["location"] = "keep:%s/%s" % (record["output"], path)
             adjustFileObjs(outputs, keepify)
             adjustDirObjs(outputs, keepify)
-        except Exception as e:
-            logger.exception("[%s] While getting final output object: %s", self.name, e)
+        except Exception:
+            logger.exception("[%s] While getting final output object", self.name)
             self.arvrunner.output_callback({}, "permanentFail")
         else:
             self.arvrunner.output_callback(outputs, processStatus)