20937: Improve error handling

author Peter Amstutz <peter.amstutz@curii.com>

Tue, 10 Oct 2023 17:49:15 +0000 (13:49 -0400)

committer Peter Amstutz <peter.amstutz@curii.com>

Tue, 10 Oct 2023 17:49:15 +0000 (13:49 -0400)
author Peter Amstutz <peter.amstutz@curii.com>
Tue, 10 Oct 2023 17:49:15 +0000 (13:49 -0400)
committer Peter Amstutz <peter.amstutz@curii.com>
Tue, 10 Oct 2023 17:49:15 +0000 (13:49 -0400)
diff --git a/doc/user/topics/arv-copy.html.textile.liquid b/doc/user/topics/arv-copy.html.textile.liquid

index 7bb4375bae85f610688476162a2f42ab0ebdaa71..86b68f2854a4fee7eb73e0006d2dd3516521fe6e 100644 (file)
--- a/doc/user/topics/arv-copy.html.textile.liquid
+++ b/doc/user/topics/arv-copy.html.textile.liquid
@@ -92,7 +92,7 @@ h3. How to copy a project
  We will use the uuid @jutro-j7d0g-xj19djofle3aryq@ as an example project.
  
  <notextile>
-<pre><code>~$ <span class="userinput">peteramstutz@shell:~$ arv-copy --project-uuid pirca-j7d0g-lr8sq3tx3ovn68k jutro-j7d0g-xj19djofle3aryq
+<pre><code>~$ <span class="userinput">~$ arv-copy --project-uuid pirca-j7d0g-lr8sq3tx3ovn68k jutro-j7d0g-xj19djofle3aryq
  2021-09-08 21:29:32 arvados.arv-copy[6377] INFO:
  2021-09-08 21:29:32 arvados.arv-copy[6377] INFO: Success: created copy with uuid pirca-j7d0g-ig9gvu5piznducp
  </code></pre>
@@ -107,20 +107,13 @@ h3. Importing HTTP resources to Keep
  You can also use @arv-copy@ to copy the contents of a HTTP URL into Keep.  When you do this, Arvados keeps track of the original URL the resource came from.  This allows you to refer to the resource by its original URL in Workflow inputs, but actually read from the local copy in Keep.
  
  <notextile>
-<pre><code>~$ <span class="userinput">peteramstutz@shell:~$ arv-copy --project-uuid tordo-j7d0g-lr8sq3tx3ovn68k https://example.com/index.html
+<pre><code>~$ <span class="userinput">~$ arv-copy --project-uuid tordo-j7d0g-lr8sq3tx3ovn68k https://example.com/index.html
  tordo-4zz18-dhpb6y9km2byb94
  2023-10-06 10:15:36 arvados.arv-copy[374147] INFO: Success: created copy with uuid tordo-4zz18-dhpb6y9km2byb94
  </code></pre>
  </notextile>
  
-In addition, if you provide a different cluster in @--src@, then @arv-copy@ will search the other cluster for a collection associated with that URL, and if found, copy from that collection instead of downloading from the original URL.
-
-<notextile>
-<pre><code>~$ <span class="userinput">peteramstutz@shell:~$ arv-copy --src pirca --project-uuid tordo-j7d0g-lr8sq3tx3ovn68k https://example.com/index.html
-tordo-4zz18-dhpb6y9km2byb94
-2023-10-06 10:15:36 arvados.arv-copy[374147] INFO: Success: created copy with uuid tordo-4zz18-dhpb6y9km2byb94
-</code></pre>
-</notextile>
+In addition, when importing from HTTP URLs, you may provide a different cluster than the destination in @--src@. This tells @arv-copy@ to search the other cluster for a collection associated with that URL, and if found, copy the collection from that cluster instead of downloading from the original URL.
  
  The following @arv-copy@ command line options affect the behavior of HTTP import.
  
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py

index d9b8bd47dc490a16f315fdf5d830d75e1cfc5007..494f2e29b6b574b21df2a113f729bfc647b95210 100755 (executable)
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -146,21 +146,29 @@ def main():
  
      # Identify the kind of object we have been given, and begin copying.
      t = uuid_type(src_arv, args.object_uuid)
-    if t == 'Collection':
-        set_src_owner_uuid(src_arv.collections(), args.object_uuid, args)
-        result = copy_collection(args.object_uuid,
-                                 src_arv, dst_arv,
-                                 args)
-    elif t == 'Workflow':
-        set_src_owner_uuid(src_arv.workflows(), args.object_uuid, args)
-        result = copy_workflow(args.object_uuid, src_arv, dst_arv, args)
-    elif t == 'Group':
-        set_src_owner_uuid(src_arv.groups(), args.object_uuid, args)
-        result = copy_project(args.object_uuid, src_arv, dst_arv, args.project_uuid, args)
-    elif t == 'httpURL':
-        result = copy_from_http(args.object_uuid, src_arv, dst_arv, args)
-    else:
-        abort("cannot copy object {} of type {}".format(args.object_uuid, t))
+
+    try:
+        if t == 'Collection':
+            set_src_owner_uuid(src_arv.collections(), args.object_uuid, args)
+            result = copy_collection(args.object_uuid,
+                                     src_arv, dst_arv,
+                                     args)
+        elif t == 'Workflow':
+            set_src_owner_uuid(src_arv.workflows(), args.object_uuid, args)
+            result = copy_workflow(args.object_uuid, src_arv, dst_arv, args)
+        elif t == 'Group':
+            set_src_owner_uuid(src_arv.groups(), args.object_uuid, args)
+            result = copy_project(args.object_uuid, src_arv, dst_arv, args.project_uuid, args)
+        elif t == 'httpURL':
+            result = copy_from_http(args.object_uuid, src_arv, dst_arv, args)
+        else:
+            abort("cannot copy object {} of type {}".format(args.object_uuid, t))
+    except Exception as e:
+        if args.verbose:
+            logger.exception("%s", e)
+        else:
+            logger.error("%s", e)
+        exit(1)
  
      # Clean up any outstanding temp git repositories.
      for d in listvalues(local_repo_dir):
@@ -570,7 +578,7 @@ def copy_collection(obj_uuid, src, dst, args):
      dst_keep = arvados.keep.KeepClient(api_client=dst, num_retries=args.retries)
      dst_manifest = io.StringIO()
      dst_locators = {}
-    bytes_written = [0]
+    bytes_written = 0
      bytes_expected = total_collection_size(manifest)
      if args.progress:
          progress_writer = ProgressWriter(human_progress)
@@ -625,12 +633,14 @@ def copy_collection(obj_uuid, src, dst, args):
                      # Drain the 'get' queue so we end early
                      while True:
                          get_queue.get(False)
+                        get_queue.task_done()
                  except queue.Empty:
                      pass
              finally:
                  get_queue.task_done()
  
      def put_thread():
+        nonlocal bytes_written
          while True:
              item = put_queue.get()
              if item is None:
@@ -651,15 +661,16 @@ def copy_collection(obj_uuid, src, dst, args):
                  dst_locator = dst_keep.put(data, classes=(args.storage_classes or []))
                  with lock:
                      dst_locators[blockhash] = dst_locator
-                    bytes_written[0] += loc.size
+                    bytes_written += loc.size
                      if progress_writer:
-                        progress_writer.report(obj_uuid, bytes_written[0], bytes_expected)
+                        progress_writer.report(obj_uuid, bytes_written, bytes_expected)
              except e:
                  logger.error("Error putting block %s (%s bytes): %s", blockhash, loc.size, e)
                  try:
                      # Drain the 'get' queue so we end early
                      while True:
                          get_queue.get(False)
+                        get_queue.task_done()
                  except queue.Empty:
                      pass
                  transfer_error.append(e)
@@ -711,7 +722,7 @@ def copy_collection(obj_uuid, src, dst, args):
          dst_manifest.write("\n")
  
      if progress_writer:
-        progress_writer.report(obj_uuid, bytes_written[0], bytes_expected)
+        progress_writer.report(obj_uuid, bytes_written, bytes_expected)
          progress_writer.finish()
  
      # Copy the manifest and save the collection.
diff --git a/sdk/python/arvados/http_to_keep.py b/sdk/python/arvados/http_to_keep.py

index b37ab5910914b67454038de18682b1bc41b5863f..1da8cf4946c652bfca3208ca632821144ccab1f5 100644 (file)
--- a/sdk/python/arvados/http_to_keep.py
+++ b/sdk/python/arvados/http_to_keep.py
@@ -182,6 +182,10 @@ class _Downloader(PyCurlHelper):
          mt = re.match(r'^HTTP\/(\d(\.\d)?) ([1-5]\d\d) ([^\r\n\x00-\x08\x0b\x0c\x0e-\x1f\x7f]*)\r\n$', self._headers["x-status-line"])
          code = int(mt.group(3))
  
+        if not self.name:
+            logger.error("Cannot determine filename from URL or headers")
+            return
+
          if code == 200:
              self.target = self.collection.open(self.name, "wb")
  
@@ -193,7 +197,10 @@ class _Downloader(PyCurlHelper):
          self.count += len(chunk)
  
          if self.target is None:
-            return
+            # "If this number is not equal to the size of the byte
+            # string, this signifies an error and libcurl will abort
+            # the request."
+            return 0
  
          self.target.write(chunk)
          loopnow = time.time()
author	Peter Amstutz <peter.amstutz@curii.com>
	Tue, 10 Oct 2023 17:49:15 +0000 (13:49 -0400)
committer	Peter Amstutz <peter.amstutz@curii.com>
	Tue, 10 Oct 2023 17:49:15 +0000 (13:49 -0400)
doc/user/topics/arv-copy.html.textile.liquid		patch \| blob \| history
sdk/python/arvados/commands/arv_copy.py		patch \| blob \| history
sdk/python/arvados/http_to_keep.py		patch \| blob \| history