[ARVADOS] updated: cdaf5c71016d2cad54d54e9b4b87bafe4554d376
git at public.curoverse.com
git at public.curoverse.com
Mon Oct 20 14:18:48 EDT 2014
Summary of changes:
sdk/python/arvados/commands/copy.py | 62 +++++++++++++++++++++++++------------
1 file changed, 42 insertions(+), 20 deletions(-)
via cdaf5c71016d2cad54d54e9b4b87bafe4554d376 (commit)
from b81c434401a503746ec54e53bf7058cf42beaa2f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit cdaf5c71016d2cad54d54e9b4b87bafe4554d376
Author: Tim Pierce <twp at curoverse.com>
Date: Mon Oct 20 14:13:37 2014 -0400
3699: collection copying bug fixes
From code review #3699-35:
* Updated help text
* Find collection UUIDs and hashes throughout script_parameters even if
buried in command arguments
* Use logger consistently in preference to "print >>sys.stderr"
* isinstance(obj, basestr)
* Fix ensure_unique_name handling
* copy_collection now correctly handles manifests without a trailing
newline
diff --git a/sdk/python/arvados/commands/copy.py b/sdk/python/arvados/commands/copy.py
index 5880c8c..2d2b32a 100755
--- a/sdk/python/arvados/commands/copy.py
+++ b/sdk/python/arvados/commands/copy.py
@@ -40,9 +40,13 @@ logger = logging.getLogger('arvados.arv-copy')
#
local_repo_dir = {}
+# List of collections that have been copied in this session, and their
+# destination collection UUIDs.
+collections_copied = {}
+
def main():
parser = argparse.ArgumentParser(
- description='Copy a pipeline instance from one Arvados instance to another.')
+ description='Copy a pipeline instance, template or collection from one Arvados instance to another.')
parser.add_argument(
'-v', '--verbose', dest='verbose', action='store_true',
@@ -203,8 +207,8 @@ def copy_pipeline_instance(pi_uuid, src, dst, args):
else:
# not recursive
- print >>sys.stderr, "Copying only pipeline instance {}.".format(pi_uuid)
- print >>sys.stderr, "You are responsible for making sure all pipeline dependencies have been updated."
+ logger.info("Copying only pipeline instance %s.", pi_uuid)
+ logger.info("You are responsible for making sure all pipeline dependencies have been updated.")
# Update the pipeline instance properties, and create the new
# instance at dst.
@@ -216,9 +220,8 @@ def copy_pipeline_instance(pi_uuid, src, dst, args):
else:
del pi['owner_uuid']
del pi['uuid']
- pi['ensure_unique_name'] = True
- new_pi = dst.pipeline_instances().create(body=pi).execute()
+ new_pi = dst.pipeline_instances().create(body=pi, ensure_unique_name=True).execute()
return new_pi
# copy_pipeline_template(pt_uuid, src, dst, args)
@@ -247,26 +250,45 @@ def copy_pipeline_template(pt_uuid, src, dst, args):
pt['description'] = "Pipeline template copied from {}\n\n{}".format(
pt_uuid, pt.get('description', ''))
pt['name'] = "{} copied from {}".format(pt.get('name', ''), pt_uuid)
- pt['ensure_unique_name'] = True
del pt['uuid']
del pt['owner_uuid']
- return dst.pipeline_templates().create(body=pt).execute()
+ return dst.pipeline_templates().create(body=pt, ensure_unique_name=True).execute()
# copy_collections(obj, src, dst, args)
#
# Recursively copies all collections referenced by 'obj' from src
-# to dst.
+# to dst. obj may be a dict or a list, in which case we run
+# copy_collections on every value it contains. If it is a string,
+# search it for any substring that matches a collection hash or uuid
+# (this will find hidden references to collections like
+# "input0": "$(file 3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq)")
#
# Returns a copy of obj with any old collection uuids replaced by
# the new ones.
#
def copy_collections(obj, src, dst, args):
- if type(obj) in [str, unicode]:
- if uuid_type(src, obj) == 'Collection':
- newc = copy_collection(obj, src, dst, args)
- if obj != newc['uuid'] and obj != newc['portable_data_hash']:
- return newc['uuid']
+
+ def copy_collection_fn(src_id):
+ """Helper function for regex substitution: copies a single collection
+ identified by 'src_id' to the destination. Returns the
+ destination collection uuid (or the portable data hash if
+ that's what src_id is).
+
+ """
+ if src_id not in collections_copied:
+ dst_col = copy_collection(src_id, src, dst, args)
+ if src_id in [dst_col['uuid'], dst_col['portable_data_hash']]:
+ collections_copied[src_id] = src_id
+ else:
+ collections_copied[src_id] = dst_col['uuid']
+ return collections_copied[src_id]
+
+ if isinstance(obj, basestring):
+ # Copy any collections identified in this string to dst, replacing
+ # them with the dst uuids as necessary.
+ obj = arvados.util.portable_data_hash_pattern.sub(copy_collection_fn, obj)
+ obj = arvados.util.collection_uuid_pattern.sub(copy_collection_fn, obj)
return obj
elif type(obj) == dict:
return {v: copy_collections(obj[v], src, dst, args) for v in obj}
@@ -379,10 +401,9 @@ def copy_collection(obj_uuid, src, dst, args):
logger.debug("Skipping collection %s (already at dst)", obj_uuid)
return dstcol['items'][0]
- logger.debug("Copying collection %s", obj_uuid)
-
# Fetch the collection's manifest.
manifest = c['manifest_text']
+ logger.debug("Copying collection %s with manifest: <%s>", obj_uuid, manifest)
# Copy each block from src_keep to dst_keep.
# Use the newly signed locators returned from dst_keep to build
@@ -420,22 +441,23 @@ def copy_collection(obj_uuid, src, dst, args):
# If 'word' can't be parsed as a locator,
# presume it's a filename.
dst_manifest_line += ' ' + word
- dst_manifest += dst_manifest_line + "\n"
+ dst_manifest += dst_manifest_line
+ if line.endswith("\n"):
+ dst_manifest += "\n"
if progress_writer:
progress_writer.finish()
# Copy the manifest and save the collection.
- logger.debug('saving {} manifest: {}'.format(obj_uuid, dst_manifest))
+ logger.debug('saving %s with manifest: <%s>', obj_uuid, dst_manifest)
dst_keep.put(dst_manifest)
if 'uuid' in c:
del c['uuid']
if 'owner_uuid' in c:
del c['owner_uuid']
- c['ensure_unique_name'] = True
c['manifest_text'] = dst_manifest
- return dst.collections().create(body=c).execute()
+ return dst.collections().create(body=c, ensure_unique_name=True).execute()
# copy_git_repo(src_git_repo, src, dst, dst_git_repo, script_version)
#
@@ -533,7 +555,7 @@ def uuid_type(api, object_uuid):
return None
def abort(msg, code=1):
- print >>sys.stderr, "arv-copy:", msg
+ logger.info("arv-copy:", msg)
exit(code)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list