Merge branch '14870-retry-logs' refs #14870

author Peter Amstutz <pamstutz@veritasgenetics.com>

Thu, 7 Mar 2019 17:58:04 +0000 (12:58 -0500)

committer Peter Amstutz <pamstutz@veritasgenetics.com>

Thu, 7 Mar 2019 17:58:04 +0000 (12:58 -0500)
author Peter Amstutz <pamstutz@veritasgenetics.com>
Thu, 7 Mar 2019 17:58:04 +0000 (12:58 -0500)
committer Peter Amstutz <pamstutz@veritasgenetics.com>
Thu, 7 Mar 2019 17:58:04 +0000 (12:58 -0500)
diff --git a/build/run-build-test-packages-one-target.sh b/build/run-build-test-packages-one-target.sh

index 7bdaacac52268a49b6fe929f0f467da061682662..b98a4c0a7e0dec9d3c8110712881d05b92f5ec03 100755 (executable)
--- a/build/run-build-test-packages-one-target.sh
+++ b/build/run-build-test-packages-one-target.sh
@@ -14,6 +14,8 @@ Syntax:
  --upload
      If the build and test steps are successful, upload the packages
      to a remote apt repository (default: false)
+--rc
+    Optional Parameter to build Release Candidate
  --build-version <version>
      Version to build (default:
      \$ARVADOS_BUILDING_VERSION-\$ARVADOS_BUILDING_ITERATION or
@@ -40,7 +42,7 @@ if ! [[ -d "$WORKSPACE" ]]; then
  fi
  
  PARSEDOPTS=$(getopt --name "$0" --longoptions \
-    help,upload,target:,build-version: \
+    help,upload,rc,target:,build-version: \
      -- "" "$@")
  if [ $? -ne 0 ]; then
      exit 1
@@ -48,6 +50,7 @@ fi
  
  TARGET=debian8
  UPLOAD=0
+RC=0
  
  declare -a build_args=()
  
@@ -65,6 +68,9 @@ while [ $# -gt 0 ]; do
          --upload)
              UPLOAD=1
              ;;
+        --rc)
+            RC=1
+            ;;
          --build-version)
              build_args+=("$1" "$2")
              shift
@@ -115,8 +121,13 @@ if [[ "$UPLOAD" != 0 ]]; then
    timer_reset
  
    if [ ${#failures[@]} -eq 0 ]; then
-    echo "/usr/local/arvados-dev/jenkins/run_upload_packages.py -H jenkinsapt@apt.arvados.org -o Port=2222 --workspace $WORKSPACE $TARGET"
-    /usr/local/arvados-dev/jenkins/run_upload_packages.py -H jenkinsapt@apt.arvados.org -o Port=2222 --workspace $WORKSPACE $TARGET
+    if [[ "$RC" != 0 ]]; then
+      echo "/usr/local/arvados-dev/jenkins/run_upload_packages_testing.py -H jenkinsapt@apt.arvados.org -o Port=2222 --workspace $WORKSPACE $TARGET"
+      /usr/local/arvados-dev/jenkins/run_upload_packages_testing.py -H jenkinsapt@apt.arvados.org -o Port=2222 --workspace $WORKSPACE $TARGET
+    else
+      echo "/usr/local/arvados-dev/jenkins/run_upload_packages.py -H jenkinsapt@apt.arvados.org -o Port=2222 --workspace $WORKSPACE $TARGET"
+      /usr/local/arvados-dev/jenkins/run_upload_packages.py -H jenkinsapt@apt.arvados.org -o Port=2222 --workspace $WORKSPACE $TARGET
+    fi
    else
      echo "Skipping package upload, there were errors building and/or testing the packages"
    fi
@@ -124,4 +135,4 @@ if [[ "$UPLOAD" != 0 ]]; then
    title "End of upload packages (`timer`)"
  fi
  
-exit_cleanly
+exit_cleanly
+\ No newline at end of file
diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid

index eda25d5cc6678524ea1cd44b0500201f49e56e65..6e2e6cba6dfeb1873d8a58049e1cdb65ac801bdd 100644 (file)
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -32,6 +32,63 @@ TODO: extract this information based on git commit messages and generate changel
  
  h3. current master branch
  
+h4. Stricter collection manifest validation on the API server
+
+As a consequence of "#14482":https://dev.arvados.org/issues/14482, the Ruby SDK does a more rigorous collection manifest validation. Collections created after 2015-05 are unlikely to be invalid, however you may check for invalid manifests using the script below.
+
+You could set up a new rvm gemset and install the specific arvados gem for testing, like so:
+
+<notextile>
+<pre><code>~$ <span class="userinput">rvm gemset create rubysdk-test</span>
+~$ <span class="userinput">rvm gemset use rubysdk-test</span>
+~$ <span class="userinput">gem install arvados -v 1.3.1.20190301212059</span>
+</code></pre>
+</notextile>
+
+Next, you can run the following script using admin credentials, it will scan the whole collection database and report any collection that didn't pass the check:
+
+{% codeblock as ruby %}
+require 'arvados'
+require 'arvados/keep'
+
+api = Arvados.new
+offset = 0
+batch_size = 100
+invalid = []
+
+while true
+    begin
+        req = api.collection.index(
+            :select => [:uuid, :created_at, :manifest_text],
+            :include_trash => true, :include_old_versions => true,
+            :limit => batch_size, :offset => offset)
+    rescue
+        invalid.each {|c| puts "#{c[:uuid]} (Created at #{c[:created_at]}): #{c[:error]}" }
+        raise
+    end
+
+    req[:items].each do |col|
+        begin
+            Keep::Manifest.validate! col[:manifest_text]
+        rescue Exception => e
+            puts "Collection #{col[:uuid]} manifest not valid"
+            invalid << {uuid: col[:uuid], error: e, created_at: col[:created_at]}
+        end
+    end
+    puts "Checked #{offset} / #{req[:items_available]} - Invalid: #{invalid.size}"
+    offset += req[:limit]
+    break if offset > req[:items_available]
+end
+
+if invalid.empty?
+    puts "No invalid collection manifests found"
+else
+    invalid.each {|c| puts "#{c[:uuid]} (Created at #{c[:created_at]}): #{c[:error]}" }
+end
+{% endcodeblock %}
+
+The script will return a final report enumerating any invalid collection by UUID, with its creation date and error message so you can take the proper correction measures, if needed.
+
  h4. Python packaging change
  
  As part of story "#9945":https://dev.arvados.org/issues/9945, the distribution packaging (deb/rpm) of our Python packages has changed. These packages now include a built-in virtualenv to reduce dependencies on system packages. We have also stopped packaging and publishing backports for all the Python dependencies of our packages, as they are no longer needed.
diff --git a/lib/cloud/azure/azure.go b/lib/cloud/azure/azure.go

index b88962714e709765f1c93e0a6a88dbcf860aabaa..d19e4bef2372ff87dfb13e26db1ffddb9726bdca 100644 (file)
--- a/lib/cloud/azure/azure.go
+++ b/lib/cloud/azure/azure.go
@@ -220,12 +220,14 @@ func newAzureInstanceSet(config json.RawMessage, dispatcherID cloud.InstanceSetI
                 return nil, err
         }
  
-       ap := azureInstanceSet{logger: logger}
-       err = ap.setup(azcfg, string(dispatcherID))
+       az := azureInstanceSet{logger: logger}
+       az.ctx, az.stopFunc = context.WithCancel(context.Background())
+       err = az.setup(azcfg, string(dispatcherID))
         if err != nil {
+               az.stopFunc()
                 return nil, err
         }
-       return &ap, nil
+       return &az, nil
  }
  
  func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID string) (err error) {
@@ -276,7 +278,6 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
         az.dispatcherID = dispatcherID
         az.namePrefix = fmt.Sprintf("compute-%s-", az.dispatcherID)
  
-       az.ctx, az.stopFunc = context.WithCancel(context.Background())
         go func() {
                 az.stopWg.Add(1)
                 defer az.stopWg.Done()
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go

index 14f6a3efced3815f11b19b6e08612ead4326e4f6..fe1c6ecc0304f64345135f6016d29a8b1512fea1 100644 (file)
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -565,6 +565,7 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
  func (wp *Pool) runMetrics() {
         ch := wp.Subscribe()
         defer wp.Unsubscribe(ch)
+       wp.updateMetrics()
         for range ch {
                 wp.updateMetrics()
         }
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py

index 9f93f0a6a6895780b2aca6e5ed235aca64d6aac9..03b4e07c76f5849a97ae85b9bd179e897ec8fc33 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -342,7 +342,7 @@ class ArvadosContainer(JobBase):
              if record["output_uuid"]:
                  if self.arvrunner.trash_intermediate or self.arvrunner.intermediate_output_ttl:
                      # Compute the trash time to avoid requesting the collection record.
-                    trash_at = ciso8601.parse_datetime(record["modified_at"]) + datetime.timedelta(0, self.arvrunner.intermediate_output_ttl)
+                    trash_at = ciso8601.parse_datetime_as_naive(record["modified_at"]) + datetime.timedelta(0, self.arvrunner.intermediate_output_ttl)
                      aftertime = " at %s" % trash_at.strftime("%Y-%m-%d %H:%M:%S UTC") if self.arvrunner.intermediate_output_ttl else ""
                      orpart = ", or" if self.arvrunner.trash_intermediate and self.arvrunner.intermediate_output_ttl else ""
                      oncomplete = " upon successful completion of the workflow" if self.arvrunner.trash_intermediate else ""
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py

index c1f2b54744083a22a52b659797a720da478304b1..319e8a887114b88b55865ca673dbafb3e0b9a7dc 100644 (file)
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -59,6 +59,7 @@ class RuntimeStatusLoggingHandler(logging.Handler):
      def __init__(self, runtime_status_update_func):
          super(RuntimeStatusLoggingHandler, self).__init__()
          self.runtime_status_update = runtime_status_update_func
+        self.updatingRuntimeStatus = False
  
      def emit(self, record):
          kind = None
@@ -66,22 +67,27 @@ class RuntimeStatusLoggingHandler(logging.Handler):
              kind = 'error'
          elif record.levelno >= logging.WARNING:
              kind = 'warning'
-        if kind is not None:
-            log_msg = record.getMessage()
-            if '\n' in log_msg:
-                # If the logged message is multi-line, use its first line as status
-                # and the rest as detail.
-                status, detail = log_msg.split('\n', 1)
-                self.runtime_status_update(
-                    kind,
-                    "%s: %s" % (record.name, status),
-                    detail
-                )
-            else:
-                self.runtime_status_update(
-                    kind,
-                    "%s: %s" % (record.name, record.getMessage())
-                )
+        if kind is not None and self.updatingRuntimeStatus is not True:
+            self.updatingRuntimeStatus = True
+            try:
+                log_msg = record.getMessage()
+                if '\n' in log_msg:
+                    # If the logged message is multi-line, use its first line as status
+                    # and the rest as detail.
+                    status, detail = log_msg.split('\n', 1)
+                    self.runtime_status_update(
+                        kind,
+                        "%s: %s" % (record.name, status),
+                        detail
+                    )
+                else:
+                    self.runtime_status_update(
+                        kind,
+                        "%s: %s" % (record.name, record.getMessage())
+                    )
+            finally:
+                self.updatingRuntimeStatus = False
+            
  
  class ArvCwlExecutor(object):
      """Execute a CWL tool or workflow, submit work (using either jobs or
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py

index de21fc0b92d1c7437978be0a5018c3ea51fd76c3..1a57da3927a352e614f5a65ebb46887864ece07b 100644 (file)
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -80,6 +80,23 @@ class TestContainer(unittest.TestCase):
  
          return loadingContext, runtimeContext
  
+    # Helper function to set up the ArvCwlExecutor to use the containers api 
+    # and test that the RuntimeStatusLoggingHandler is set up correctly
+    def setup_and_test_container_executor_and_logging(self, gcc_mock) :
+        api = mock.MagicMock()
+        api._rootDesc = copy.deepcopy(get_rootDesc())
+        del api._rootDesc.get('resources')['jobs']['methods']['create']
+
+        # Make sure ArvCwlExecutor thinks it's running inside a container so it
+        # adds the logging handler that will call runtime_status_update() mock
+        self.assertFalse(gcc_mock.called)
+        runner = arvados_cwl.ArvCwlExecutor(api)
+        self.assertEqual(runner.work_api, 'containers')
+        root_logger = logging.getLogger('')
+        handlerClasses = [h.__class__ for h in root_logger.handlers]
+        self.assertTrue(arvados_cwl.RuntimeStatusLoggingHandler in handlerClasses)
+        return runner
+        
      # The test passes no builder.resources
      # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
      @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
@@ -500,28 +517,30 @@ class TestContainer(unittest.TestCase):
          arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
          runner.add_intermediate_output.assert_called_with("zzzzz-4zz18-zzzzzzzzzzzzzz2")
  
+    # Test to make sure we dont call runtime_status_update if we already did
+    # some where higher up in the call stack
      @mock.patch("arvados_cwl.util.get_current_container")
-    @mock.patch("arvados.collection.CollectionReader")
-    @mock.patch("arvados.collection.Collection")
-    def test_child_failure(self, col, reader, gcc_mock):
-        api = mock.MagicMock()
-        api._rootDesc = copy.deepcopy(get_rootDesc())
-        del api._rootDesc.get('resources')['jobs']['methods']['create']
+    def test_recursive_runtime_status_update(self, gcc_mock):
+        self.setup_and_test_container_executor_and_logging(gcc_mock)
+        root_logger = logging.getLogger('')
  
-        # Set up runner with mocked runtime_status_update()
-        self.assertFalse(gcc_mock.called)
-        runtime_status_update = mock.MagicMock()
-        arvados_cwl.ArvCwlExecutor.runtime_status_update = runtime_status_update
-        runner = arvados_cwl.ArvCwlExecutor(api)
-        self.assertEqual(runner.work_api, 'containers')
+        # get_current_container is invoked when we call runtime_status_update
+        # so try and log again!
+        gcc_mock.side_effect = lambda *args: root_logger.error("Second Error")
+        try: 
+            root_logger.error("First Error")
+        except RuntimeError: 
+            self.fail("RuntimeStatusLoggingHandler should not be called recursively")
  
-        # Make sure ArvCwlExecutor thinks it's running inside a container so it
-        # adds the logging handler that will call runtime_status_update() mock
+    @mock.patch("arvados_cwl.ArvCwlExecutor.runtime_status_update")
+    @mock.patch("arvados_cwl.util.get_current_container")
+    @mock.patch("arvados.collection.CollectionReader")
+    @mock.patch("arvados.collection.Collection")
+    def test_child_failure(self, col, reader, gcc_mock, rts_mock):
+        runner = self.setup_and_test_container_executor_and_logging(gcc_mock)
+        
          gcc_mock.return_value = {"uuid" : "zzzzz-dz642-zzzzzzzzzzzzzzz"}
          self.assertTrue(gcc_mock.called)
-        root_logger = logging.getLogger('')
-        handlerClasses = [h.__class__ for h in root_logger.handlers]
-        self.assertTrue(arvados_cwl.RuntimeStatusLoggingHandler in handlerClasses)
  
          runner.num_retries = 0
          runner.ignore_docker_for_reuse = False
@@ -565,7 +584,7 @@ class TestContainer(unittest.TestCase):
              "modified_at": "2017-05-26T12:01:22Z"
          })
  
-        runtime_status_update.assert_called_with(
+        rts_mock.assert_called_with(
              'error',
              'arvados.cwl-runner: [container testjob] (zzzzz-xvhdp-zzzzzzzzzzzzzzz) error log:',
              '  ** log is empty **'
diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py

index e596e669156f9014ce5b9ad66d05fc2ba561bb3a..ec2a9942a6794153ea69138ba467a20f8b1ae6a6 100644 (file)
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -230,12 +230,12 @@ def docker_link_sort_key(link):
      Docker metadata links to sort them from least to most preferred.
      """
      try:
-        image_timestamp = ciso8601.parse_datetime(
+        image_timestamp = ciso8601.parse_datetime_as_naive(
              link['properties']['image_timestamp'])
      except (KeyError, ValueError):
          image_timestamp = EARLIEST_DATETIME
      try:
-        created_timestamp = ciso8601.parse_datetime(link['created_at'])
+        created_timestamp = ciso8601.parse_datetime_as_naive(link['created_at'])
      except ValueError:
          created_timestamp = None
      return (image_timestamp, created_timestamp)
diff --git a/services/fuse/arvados_fuse/fresh.py b/services/fuse/arvados_fuse/fresh.py

index acebe2b1b7eaaffdc642a93849be9d3ab03b2e02..2e7a2a8182b62e2ffe8aa474a75c1f35b2723a43 100644 (file)
--- a/services/fuse/arvados_fuse/fresh.py
+++ b/services/fuse/arvados_fuse/fresh.py
@@ -12,7 +12,7 @@ def convertTime(t):
      if not t:
          return 0
      try:
-        return calendar.timegm(ciso8601.parse_datetime(t).timetuple())
+        return calendar.timegm(ciso8601.parse_datetime_as_naive(t).timetuple())
      except (TypeError, ValueError):
          return 0
author	Peter Amstutz <pamstutz@veritasgenetics.com>
	Thu, 7 Mar 2019 17:58:04 +0000 (12:58 -0500)
committer	Peter Amstutz <pamstutz@veritasgenetics.com>
	Thu, 7 Mar 2019 17:58:04 +0000 (12:58 -0500)
build/run-build-test-packages-one-target.sh		patch \| blob \| history
doc/admin/upgrading.html.textile.liquid		patch \| blob \| history
lib/cloud/azure/azure.go		patch \| blob \| history
lib/dispatchcloud/worker/pool.go		patch \| blob \| history
sdk/cwl/arvados_cwl/arvcontainer.py		patch \| blob \| history
sdk/cwl/arvados_cwl/executor.py		patch \| blob \| history
sdk/cwl/tests/test_container.py		patch \| blob \| history
sdk/python/arvados/commands/keepdocker.py		patch \| blob \| history
services/fuse/arvados_fuse/fresh.py		patch \| blob \| history