We will use the uuid @jutro-j7d0g-xj19djofle3aryq@ as an example project.
<notextile>
-<pre><code>~$ <span class="userinput">peteramstutz@shell:~$ arv-copy --project-uuid pirca-j7d0g-lr8sq3tx3ovn68k jutro-j7d0g-xj19djofle3aryq
+<pre><code>~$ <span class="userinput">~$ arv-copy --project-uuid pirca-j7d0g-lr8sq3tx3ovn68k jutro-j7d0g-xj19djofle3aryq
2021-09-08 21:29:32 arvados.arv-copy[6377] INFO:
2021-09-08 21:29:32 arvados.arv-copy[6377] INFO: Success: created copy with uuid pirca-j7d0g-ig9gvu5piznducp
</code></pre>
You can also use @arv-copy@ to copy the contents of a HTTP URL into Keep. When you do this, Arvados keeps track of the original URL the resource came from. This allows you to refer to the resource by its original URL in Workflow inputs, but actually read from the local copy in Keep.
<notextile>
-<pre><code>~$ <span class="userinput">peteramstutz@shell:~$ arv-copy --project-uuid tordo-j7d0g-lr8sq3tx3ovn68k https://example.com/index.html
+<pre><code>~$ <span class="userinput">~$ arv-copy --project-uuid tordo-j7d0g-lr8sq3tx3ovn68k https://example.com/index.html
tordo-4zz18-dhpb6y9km2byb94
2023-10-06 10:15:36 arvados.arv-copy[374147] INFO: Success: created copy with uuid tordo-4zz18-dhpb6y9km2byb94
</code></pre>
</notextile>
-In addition, if you provide a different cluster in @--src@, then @arv-copy@ will search the other cluster for a collection associated with that URL, and if found, copy from that collection instead of downloading from the original URL.
-
-<notextile>
-<pre><code>~$ <span class="userinput">peteramstutz@shell:~$ arv-copy --src pirca --project-uuid tordo-j7d0g-lr8sq3tx3ovn68k https://example.com/index.html
-tordo-4zz18-dhpb6y9km2byb94
-2023-10-06 10:15:36 arvados.arv-copy[374147] INFO: Success: created copy with uuid tordo-4zz18-dhpb6y9km2byb94
-</code></pre>
-</notextile>
+In addition, when importing from HTTP URLs, you may provide a different cluster than the destination in @--src@. This tells @arv-copy@ to search the other cluster for a collection associated with that URL, and if found, copy the collection from that cluster instead of downloading from the original URL.
The following @arv-copy@ command line options affect the behavior of HTTP import.
# Identify the kind of object we have been given, and begin copying.
t = uuid_type(src_arv, args.object_uuid)
- if t == 'Collection':
- set_src_owner_uuid(src_arv.collections(), args.object_uuid, args)
- result = copy_collection(args.object_uuid,
- src_arv, dst_arv,
- args)
- elif t == 'Workflow':
- set_src_owner_uuid(src_arv.workflows(), args.object_uuid, args)
- result = copy_workflow(args.object_uuid, src_arv, dst_arv, args)
- elif t == 'Group':
- set_src_owner_uuid(src_arv.groups(), args.object_uuid, args)
- result = copy_project(args.object_uuid, src_arv, dst_arv, args.project_uuid, args)
- elif t == 'httpURL':
- result = copy_from_http(args.object_uuid, src_arv, dst_arv, args)
- else:
- abort("cannot copy object {} of type {}".format(args.object_uuid, t))
+
+ try:
+ if t == 'Collection':
+ set_src_owner_uuid(src_arv.collections(), args.object_uuid, args)
+ result = copy_collection(args.object_uuid,
+ src_arv, dst_arv,
+ args)
+ elif t == 'Workflow':
+ set_src_owner_uuid(src_arv.workflows(), args.object_uuid, args)
+ result = copy_workflow(args.object_uuid, src_arv, dst_arv, args)
+ elif t == 'Group':
+ set_src_owner_uuid(src_arv.groups(), args.object_uuid, args)
+ result = copy_project(args.object_uuid, src_arv, dst_arv, args.project_uuid, args)
+ elif t == 'httpURL':
+ result = copy_from_http(args.object_uuid, src_arv, dst_arv, args)
+ else:
+ abort("cannot copy object {} of type {}".format(args.object_uuid, t))
+ except Exception as e:
+ if args.verbose:
+ logger.exception("%s", e)
+ else:
+ logger.error("%s", e)
+ exit(1)
# Clean up any outstanding temp git repositories.
for d in listvalues(local_repo_dir):
dst_keep = arvados.keep.KeepClient(api_client=dst, num_retries=args.retries)
dst_manifest = io.StringIO()
dst_locators = {}
- bytes_written = [0]
+ bytes_written = 0
bytes_expected = total_collection_size(manifest)
if args.progress:
progress_writer = ProgressWriter(human_progress)
# Drain the 'get' queue so we end early
while True:
get_queue.get(False)
+ get_queue.task_done()
except queue.Empty:
pass
finally:
get_queue.task_done()
def put_thread():
+ nonlocal bytes_written
while True:
item = put_queue.get()
if item is None:
dst_locator = dst_keep.put(data, classes=(args.storage_classes or []))
with lock:
dst_locators[blockhash] = dst_locator
- bytes_written[0] += loc.size
+ bytes_written += loc.size
if progress_writer:
- progress_writer.report(obj_uuid, bytes_written[0], bytes_expected)
+ progress_writer.report(obj_uuid, bytes_written, bytes_expected)
except e:
logger.error("Error putting block %s (%s bytes): %s", blockhash, loc.size, e)
try:
# Drain the 'get' queue so we end early
while True:
get_queue.get(False)
+ get_queue.task_done()
except queue.Empty:
pass
transfer_error.append(e)
dst_manifest.write("\n")
if progress_writer:
- progress_writer.report(obj_uuid, bytes_written[0], bytes_expected)
+ progress_writer.report(obj_uuid, bytes_written, bytes_expected)
progress_writer.finish()
# Copy the manifest and save the collection.