[arvados] updated: 2.7.0-4966-g51ab4e57c3
git repository hosting
git at public.arvados.org
Tue Oct 10 17:50:30 UTC 2023
Summary of changes:
doc/user/topics/arv-copy.html.textile.liquid | 13 ++------
sdk/python/arvados/commands/arv_copy.py | 49 +++++++++++++++++-----------
sdk/python/arvados/http_to_keep.py | 9 ++++-
3 files changed, 41 insertions(+), 30 deletions(-)
via 51ab4e57c3e395bee108270e0b3620bffb0c5856 (commit)
from 73a20dc01eb18185bbccbbe3878b9fc56e4cbad8 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 51ab4e57c3e395bee108270e0b3620bffb0c5856
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Tue Oct 10 13:49:15 2023 -0400
20937: Improve error handling
Also clean up docs a bit
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/doc/user/topics/arv-copy.html.textile.liquid b/doc/user/topics/arv-copy.html.textile.liquid
index 7bb4375bae..86b68f2854 100644
--- a/doc/user/topics/arv-copy.html.textile.liquid
+++ b/doc/user/topics/arv-copy.html.textile.liquid
@@ -92,7 +92,7 @@ h3. How to copy a project
We will use the uuid @jutro-j7d0g-xj19djofle3aryq@ as an example project.
<notextile>
-<pre><code>~$ <span class="userinput">peteramstutz at shell:~$ arv-copy --project-uuid pirca-j7d0g-lr8sq3tx3ovn68k jutro-j7d0g-xj19djofle3aryq
+<pre><code>~$ <span class="userinput">~$ arv-copy --project-uuid pirca-j7d0g-lr8sq3tx3ovn68k jutro-j7d0g-xj19djofle3aryq
2021-09-08 21:29:32 arvados.arv-copy[6377] INFO:
2021-09-08 21:29:32 arvados.arv-copy[6377] INFO: Success: created copy with uuid pirca-j7d0g-ig9gvu5piznducp
</code></pre>
@@ -107,20 +107,13 @@ h3. Importing HTTP resources to Keep
You can also use @arv-copy@ to copy the contents of a HTTP URL into Keep. When you do this, Arvados keeps track of the original URL the resource came from. This allows you to refer to the resource by its original URL in Workflow inputs, but actually read from the local copy in Keep.
<notextile>
-<pre><code>~$ <span class="userinput">peteramstutz at shell:~$ arv-copy --project-uuid tordo-j7d0g-lr8sq3tx3ovn68k https://example.com/index.html
+<pre><code>~$ <span class="userinput">~$ arv-copy --project-uuid tordo-j7d0g-lr8sq3tx3ovn68k https://example.com/index.html
tordo-4zz18-dhpb6y9km2byb94
2023-10-06 10:15:36 arvados.arv-copy[374147] INFO: Success: created copy with uuid tordo-4zz18-dhpb6y9km2byb94
</code></pre>
</notextile>
-In addition, if you provide a different cluster in @--src@, then @arv-copy@ will search the other cluster for a collection associated with that URL, and if found, copy from that collection instead of downloading from the original URL.
-
-<notextile>
-<pre><code>~$ <span class="userinput">peteramstutz at shell:~$ arv-copy --src pirca --project-uuid tordo-j7d0g-lr8sq3tx3ovn68k https://example.com/index.html
-tordo-4zz18-dhpb6y9km2byb94
-2023-10-06 10:15:36 arvados.arv-copy[374147] INFO: Success: created copy with uuid tordo-4zz18-dhpb6y9km2byb94
-</code></pre>
-</notextile>
+In addition, when importing from HTTP URLs, you may provide a different cluster than the destination in @--src at . This tells @arv-copy@ to search the other cluster for a collection associated with that URL, and if found, copy the collection from that cluster instead of downloading from the original URL.
The following @arv-copy@ command line options affect the behavior of HTTP import.
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index d9b8bd47dc..494f2e29b6 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -146,21 +146,29 @@ def main():
# Identify the kind of object we have been given, and begin copying.
t = uuid_type(src_arv, args.object_uuid)
- if t == 'Collection':
- set_src_owner_uuid(src_arv.collections(), args.object_uuid, args)
- result = copy_collection(args.object_uuid,
- src_arv, dst_arv,
- args)
- elif t == 'Workflow':
- set_src_owner_uuid(src_arv.workflows(), args.object_uuid, args)
- result = copy_workflow(args.object_uuid, src_arv, dst_arv, args)
- elif t == 'Group':
- set_src_owner_uuid(src_arv.groups(), args.object_uuid, args)
- result = copy_project(args.object_uuid, src_arv, dst_arv, args.project_uuid, args)
- elif t == 'httpURL':
- result = copy_from_http(args.object_uuid, src_arv, dst_arv, args)
- else:
- abort("cannot copy object {} of type {}".format(args.object_uuid, t))
+
+ try:
+ if t == 'Collection':
+ set_src_owner_uuid(src_arv.collections(), args.object_uuid, args)
+ result = copy_collection(args.object_uuid,
+ src_arv, dst_arv,
+ args)
+ elif t == 'Workflow':
+ set_src_owner_uuid(src_arv.workflows(), args.object_uuid, args)
+ result = copy_workflow(args.object_uuid, src_arv, dst_arv, args)
+ elif t == 'Group':
+ set_src_owner_uuid(src_arv.groups(), args.object_uuid, args)
+ result = copy_project(args.object_uuid, src_arv, dst_arv, args.project_uuid, args)
+ elif t == 'httpURL':
+ result = copy_from_http(args.object_uuid, src_arv, dst_arv, args)
+ else:
+ abort("cannot copy object {} of type {}".format(args.object_uuid, t))
+ except Exception as e:
+ if args.verbose:
+ logger.exception("%s", e)
+ else:
+ logger.error("%s", e)
+ exit(1)
# Clean up any outstanding temp git repositories.
for d in listvalues(local_repo_dir):
@@ -570,7 +578,7 @@ def copy_collection(obj_uuid, src, dst, args):
dst_keep = arvados.keep.KeepClient(api_client=dst, num_retries=args.retries)
dst_manifest = io.StringIO()
dst_locators = {}
- bytes_written = [0]
+ bytes_written = 0
bytes_expected = total_collection_size(manifest)
if args.progress:
progress_writer = ProgressWriter(human_progress)
@@ -625,12 +633,14 @@ def copy_collection(obj_uuid, src, dst, args):
# Drain the 'get' queue so we end early
while True:
get_queue.get(False)
+ get_queue.task_done()
except queue.Empty:
pass
finally:
get_queue.task_done()
def put_thread():
+ nonlocal bytes_written
while True:
item = put_queue.get()
if item is None:
@@ -651,15 +661,16 @@ def copy_collection(obj_uuid, src, dst, args):
dst_locator = dst_keep.put(data, classes=(args.storage_classes or []))
with lock:
dst_locators[blockhash] = dst_locator
- bytes_written[0] += loc.size
+ bytes_written += loc.size
if progress_writer:
- progress_writer.report(obj_uuid, bytes_written[0], bytes_expected)
+ progress_writer.report(obj_uuid, bytes_written, bytes_expected)
except e:
logger.error("Error putting block %s (%s bytes): %s", blockhash, loc.size, e)
try:
# Drain the 'get' queue so we end early
while True:
get_queue.get(False)
+ get_queue.task_done()
except queue.Empty:
pass
transfer_error.append(e)
@@ -711,7 +722,7 @@ def copy_collection(obj_uuid, src, dst, args):
dst_manifest.write("\n")
if progress_writer:
- progress_writer.report(obj_uuid, bytes_written[0], bytes_expected)
+ progress_writer.report(obj_uuid, bytes_written, bytes_expected)
progress_writer.finish()
# Copy the manifest and save the collection.
diff --git a/sdk/python/arvados/http_to_keep.py b/sdk/python/arvados/http_to_keep.py
index b37ab59109..1da8cf4946 100644
--- a/sdk/python/arvados/http_to_keep.py
+++ b/sdk/python/arvados/http_to_keep.py
@@ -182,6 +182,10 @@ class _Downloader(PyCurlHelper):
mt = re.match(r'^HTTP\/(\d(\.\d)?) ([1-5]\d\d) ([^\r\n\x00-\x08\x0b\x0c\x0e-\x1f\x7f]*)\r\n$', self._headers["x-status-line"])
code = int(mt.group(3))
+ if not self.name:
+ logger.error("Cannot determine filename from URL or headers")
+ return
+
if code == 200:
self.target = self.collection.open(self.name, "wb")
@@ -193,7 +197,10 @@ class _Downloader(PyCurlHelper):
self.count += len(chunk)
if self.target is None:
- return
+ # "If this number is not equal to the size of the byte
+ # string, this signifies an error and libcurl will abort
+ # the request."
+ return 0
self.target.write(chunk)
loopnow = time.time()
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list