[ARVADOS] updated: 407ac18e99051a67d4a615bfe2189d5562334f12

git at public.curoverse.com git at public.curoverse.com
Tue Apr 7 10:34:50 EDT 2015


Summary of changes:
 sdk/python/arvados/commands/arv_copy.py | 97 +++++++++++++++------------------
 1 file changed, 45 insertions(+), 52 deletions(-)

       via  407ac18e99051a67d4a615bfe2189d5562334f12 (commit)
       via  bfb34761f152b5cd5ec7f530aa354f2bd850ac4e (commit)
      from  92052dc0b2f80a16420483a756fcebedc6c6ec3c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 407ac18e99051a67d4a615bfe2189d5562334f12
Merge: 92052dc bfb3476
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Apr 7 10:34:31 2015 -0400

    Merge branch '5653-arv-copy-multiple-branches-wip'
    
    Closes #5653, #5673.


commit bfb34761f152b5cd5ec7f530aa354f2bd850ac4e
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Apr 6 14:27:21 2015 -0400

    5653: arv-copy copies multiple commits from the same repository+pipeline.
    
    arv-copy previously used the repository name alone to determine which
    job scripts it had already copied to the destination.  If a pipeline
    used unrelated commits from the same repository, it would skip copying
    over all but the first.  Track script versions throughout the copy
    process and make sure all of them are copied to the destination
    repository.

diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index 6632cfd..54d9798 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -46,6 +46,9 @@ local_repo_dir = {}
 # destination collection UUIDs.
 collections_copied = {}
 
+# Set of (repository, script_version) two-tuples of commits copied in git.
+scripts_copied = set()
+
 # The owner_uuid of the object being copied
 src_owner_uuid = None
 
@@ -318,6 +321,31 @@ def copy_collections(obj, src, dst, args):
         return [copy_collections(v, src, dst, args) for v in obj]
     return obj
 
+def migrate_jobspec(jobspec, src, dst, dst_repo, args):
+    """Copy a job's script to the destination repository, and update its record.
+
+    Given a jobspec dictionary, this function finds the referenced script from
+    src and copies it to dst and dst_repo.  It also updates jobspec in place to
+    refer to names on the destination.
+    """
+    repo = jobspec.get('repository')
+    if repo is None:
+        return
+    # script_version is the "script_version" parameter from the source
+    # component or job.  If no script_version was supplied in the
+    # component or job, it is a mistake in the pipeline, but for the
+    # purposes of copying the repository, default to "master".
+    script_version = jobspec.get('script_version') or 'master'
+    script_key = (repo, script_version)
+    if script_key not in scripts_copied:
+        copy_git_repo(repo, src, dst, dst_repo, script_version, args)
+        scripts_copied.add(script_key)
+    jobspec['repository'] = dst_repo
+    repo_dir = local_repo_dir[repo]
+    for version_key in ['script_version', 'supplied_script_version']:
+        if version_key in jobspec:
+            jobspec[version_key] = git_rev_parse(jobspec[version_key], repo_dir)
+
 # copy_git_repos(p, src, dst, dst_repo, args)
 #
 #    Copies all git repositories referenced by pipeline instance or
@@ -336,33 +364,10 @@ def copy_collections(obj, src, dst, args):
 #    names.  The return value is undefined.
 #
 def copy_git_repos(p, src, dst, dst_repo, args):
-    copied = set()
-    for c in p['components']:
-        component = p['components'][c]
-        if 'repository' in component:
-            repo = component['repository']
-            script_version = component.get('script_version', None)
-            if repo not in copied:
-                copy_git_repo(repo, src, dst, dst_repo, script_version, args)
-                copied.add(repo)
-            component['repository'] = dst_repo
-            if script_version:
-                repo_dir = local_repo_dir[repo]
-                component['script_version'] = git_rev_parse(script_version, repo_dir)
+    for component in p['components'].itervalues():
+        migrate_jobspec(component, src, dst, dst_repo, args)
         if 'job' in component:
-            j = component['job']
-            if 'repository' in j:
-                repo = j['repository']
-                script_version = j.get('script_version', None)
-                if repo not in copied:
-                    copy_git_repo(repo, src, dst, dst_repo, script_version, args)
-                    copied.add(repo)
-                j['repository'] = dst_repo
-                repo_dir = local_repo_dir[repo]
-                if script_version:
-                    j['script_version'] = git_rev_parse(script_version, repo_dir)
-                if 'supplied_script_version' in j:
-                    j['supplied_script_version'] = git_rev_parse(j['supplied_script_version'], repo_dir)
+            migrate_jobspec(component['job'], src, dst, dst_repo, args)
 
 def total_collection_size(manifest_text):
     """Return the total number of bytes in this collection (excluding
@@ -577,8 +582,7 @@ def copy_collection(obj_uuid, src, dst, args):
 #    All commits will be copied to a destination branch named for the
 #    source repository URL.
 #
-#    Because users cannot create their own repositories, the
-#    destination repository must already exist.
+#    The destination repository must already exist.
 #
 #    The user running this command must be authenticated
 #    to both repositories.
@@ -601,34 +605,23 @@ def copy_git_repo(src_git_repo, src, dst, dst_git_repo, script_version, args):
     dst_git_push_url  = r['items'][0]['push_url']
     logger.debug('dst_git_push_url: {}'.format(dst_git_push_url))
 
-    # script_version is the "script_version" parameter from the source
-    # component or job.  It is used here to tie the destination branch
-    # to the commit that was used on the source.  If no script_version
-    # was supplied in the component or job, it is a mistake in the pipeline,
-    # but for the purposes of copying the repository, default to "master".
-    #
-    if not script_version:
-        script_version = "master"
-
     dst_branch = re.sub(r'\W+', '_', "{}_{}".format(src_git_url, script_version))
 
-    # Copy git commits from src repo to dst repo (but only if
-    # we have not already copied this repo in this session).
-    #
-    if src_git_repo in local_repo_dir:
-        logger.debug('already copied src repo %s, skipping', src_git_repo)
-    else:
-        tmprepo = tempfile.mkdtemp()
-        local_repo_dir[src_git_repo] = tmprepo
+    # Copy git commits from src repo to dst repo.
+    if src_git_repo not in local_repo_dir:
+        local_repo_dir[src_git_repo] = tempfile.mkdtemp()
         arvados.util.run_command(
-            ["git", "clone", "--bare", src_git_url, tmprepo],
-            cwd=os.path.dirname(tmprepo))
+            ["git", "clone", "--bare", src_git_url,
+             local_repo_dir[src_git_repo]],
+            cwd=os.path.dirname(local_repo_dir[src_git_repo]))
         arvados.util.run_command(
-            ["git", "branch", dst_branch, script_version],
-            cwd=tmprepo)
-        arvados.util.run_command(["git", "remote", "add", "dst", dst_git_push_url], cwd=tmprepo)
-        arvados.util.run_command(["git", "push", "dst", dst_branch], cwd=tmprepo)
-
+            ["git", "remote", "add", "dst", dst_git_push_url],
+            cwd=local_repo_dir[src_git_repo])
+    arvados.util.run_command(
+        ["git", "branch", dst_branch, script_version],
+        cwd=local_repo_dir[src_git_repo])
+    arvados.util.run_command(["git", "push", "dst", dst_branch],
+                             cwd=local_repo_dir[src_git_repo])
 
 def copy_docker_images(pipeline, src, dst, args):
     """Copy any docker images named in the pipeline components'

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list