[ARVADOS] updated: b251bf4a306e641aa4ab6e2c42469280c3285059

git at public.curoverse.com git at public.curoverse.com
Thu Jun 12 15:45:14 EDT 2014


Summary of changes:
 sdk/cli/bin/arv                           |   5 +-
 sdk/python/arvados/commands/_util.py      |  32 ++++
 sdk/python/arvados/commands/keepdocker.py | 219 +++++++++++++++++++++++++
 sdk/python/arvados/commands/put.py        | 255 ++++++++++++++++--------------
 sdk/python/arvados/keep.py                |  13 +-
 sdk/python/arvados/util.py                |  24 +++
 sdk/python/bin/arv-keepdocker             |   4 +
 sdk/python/setup.py                       |   3 +-
 sdk/python/tests/test_arv_put.py          |   5 +-
 9 files changed, 424 insertions(+), 136 deletions(-)
 create mode 100644 sdk/python/arvados/commands/_util.py
 create mode 100644 sdk/python/arvados/commands/keepdocker.py
 create mode 100755 sdk/python/bin/arv-keepdocker

       via  b251bf4a306e641aa4ab6e2c42469280c3285059 (commit)
       via  95bb3f7e95b4f4383f201e5be235ac796486d1d0 (commit)
       via  bda86a5d49b1306e0a56c3d1ad71333ab2767e4a (commit)
       via  9047de800ce3288848b1638bd2060450e9afd034 (commit)
       via  728fbdbae7d8e926f64a09d3f20aad6bdb67435e (commit)
       via  af6ef4aefe653ac5fd7bbd028af580ddfeb8f4d9 (commit)
       via  e0b4f004501f54a1c3824ae0974f2b4619800eed (commit)
       via  c7f445954df35959174761ba7b1f44ecf377c87a (commit)
       via  75483667d22a4c40ab238a13cb42d565304e05c5 (commit)
      from  4d84c7d2c66ea255db24e2b4159bcafed29ef00d (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit b251bf4a306e641aa4ab6e2c42469280c3285059
Merge: 4d84c7d 95bb3f7
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Jun 12 15:45:36 2014 -0400

    Merge branch '2879-docker-image-installer'
    
    Refs #2879.  Closes #2998, #3006.


commit 95bb3f7e95b4f4383f201e5be235ac796486d1d0
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Jun 12 15:36:18 2014 -0400

    2879: arv-keepdocker reports a better error for ambiguous hashes.
    
    Refs #2879.

diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py
index c04885c..abf60f2 100644
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -82,23 +82,31 @@ def docker_images():
     list_proc.stdout.close()
     check_docker(list_proc, "images")
 
-def find_image_hash(image_search, image_tag=None):
-    # Given one argument, search for one Docker image with a matching hash,
-    # and return its full hash.
-    # Given two arguments, also search for a Docker image with the same
-    # repository and tag.  If one is found, return its hash; otherwise,
-    # fall back to the one-argument hash search.
+def find_image_hashes(image_search, image_tag=None):
+    # Given one argument, search for Docker images with matching hashes,
+    # and return their full hashes in a set.
+    # Given two arguments, also search for a Docker image with the
+    # same repository and tag.  If one is found, return its hash in a
+    # set; otherwise, fall back to the one-argument hash search.
     # Returns None if no match is found, or a hash search is ambiguous.
     hash_search = image_search.lower()
     hash_matches = set()
     for image in docker_images():
         if (image.repo == image_search) and (image.tag == image_tag):
-            return image.hash
+            return set([image.hash])
         elif image.hash.startswith(hash_search):
             hash_matches.add(image.hash)
-    if len(hash_matches) == 1:
-        return hash_matches.pop()
-    return None
+    return hash_matches
+
+def find_one_image_hash(image_search, image_tag=None):
+    hashes = find_image_hashes(image_search, image_tag)
+    hash_count = len(hashes)
+    if hash_count == 1:
+        return hashes.pop()
+    elif hash_count == 0:
+        raise DockerError("no matching image found")
+    else:
+        raise DockerError("{} images match {}".format(hash_count, image_search))
 
 def stat_cache_name(image_file):
     return getattr(image_file, 'name', image_file) + '.stat'
@@ -149,14 +157,15 @@ def main(arguments=None):
 
     # Pull the image if requested, unless the image is specified as a hash
     # that we already have.
-    if args.pull and (find_image_hash(args.image) is None):
+    if args.pull and not find_image_hashes(args.image):
         pull_image(args.image, args.tag)
 
-    image_hash = find_image_hash(args.image, args.tag)
-    if image_hash is None:
-        print >>sys.stderr, "arv-keepdocker: No image found."
+    try:
+        image_hash = find_one_image_hash(args.image, args.tag)
+    except DockerError as error:
+        print >>sys.stderr, "arv-keepdocker:", error.message
         sys.exit(1)
-    elif not args.force:
+    if not args.force:
         # Abort if this image is already in Arvados.
         existing_links = arvados.api('v1').links().list(
             filters=[['link_class', '=', 'docker_image_hash'],

commit bda86a5d49b1306e0a56c3d1ad71333ab2767e4a
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Jun 12 14:51:23 2014 -0400

    2879: Clarify Docker image search behavior in Keep installer.

diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py
index 318f730..c04885c 100644
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -82,11 +82,17 @@ def docker_images():
     list_proc.stdout.close()
     check_docker(list_proc, "images")
 
-def find_image_hash(image_name, image_tag):
-    hash_search = image_name.lower()
+def find_image_hash(image_search, image_tag=None):
+    # Given one argument, search for one Docker image with a matching hash,
+    # and return its full hash.
+    # Given two arguments, also search for a Docker image with the same
+    # repository and tag.  If one is found, return its hash; otherwise,
+    # fall back to the one-argument hash search.
+    # Returns None if no match is found, or a hash search is ambiguous.
+    hash_search = image_search.lower()
     hash_matches = set()
     for image in docker_images():
-        if (image.repo == image_name) and (image.tag == image_tag):
+        if (image.repo == image_search) and (image.tag == image_tag):
             return image.hash
         elif image.hash.startswith(hash_search):
             hash_matches.add(image.hash)
@@ -143,7 +149,7 @@ def main(arguments=None):
 
     # Pull the image if requested, unless the image is specified as a hash
     # that we already have.
-    if args.pull and (find_image_hash(args.image, None) is None):
+    if args.pull and (find_image_hash(args.image) is None):
         pull_image(args.image, args.tag)
 
     image_hash = find_image_hash(args.image, args.tag)

commit 9047de800ce3288848b1638bd2060450e9afd034
Author: Brett Smith <brett at curoverse.com>
Date:   Thu Jun 12 14:44:55 2014 -0400

    2879: Docker Keep installer returns 0 if image already installed.

diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py
index 0c4930e..318f730 100644
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -161,7 +161,7 @@ def main(arguments=None):
                 format(image_hash)]
             message.extend(link['head_uuid'] for link in existing_links)
             print >>sys.stderr, "\n".join(message)
-            sys.exit(1)
+            sys.exit(0)
 
     # Open a file for the saved image, and write it if needed.
     outfile_name = '{}.tar'.format(image_hash)

commit 728fbdbae7d8e926f64a09d3f20aad6bdb67435e
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Jun 10 16:34:01 2014 -0400

    2879: Add arv-keepdocker command.
    
    This puts a Docker image in Keep, and makes Arvados links to help find
    the Collection by the image's names.

diff --git a/sdk/cli/bin/arv b/sdk/cli/bin/arv
index 31cbeec..b485b7b 100755
--- a/sdk/cli/bin/arv
+++ b/sdk/cli/bin/arv
@@ -42,13 +42,16 @@ when 'keep'
   elsif ['less', 'check'].index @sub then
     # wh* shims
     exec `which wh#{@sub}`.strip, *ARGV
+  elsif @sub == 'docker'
+    exec `which arv-keepdocker`.strip, *ARGV
   else
     puts "Usage: \n" +
       "#{$0} keep ls\n" +
       "#{$0} keep get\n" +
       "#{$0} keep put\n" +
       "#{$0} keep less\n" +
-      "#{$0} keep check\n"
+      "#{$0} keep check\n" +
+      "#{$0} keep docker\n"
   end
   abort
 when 'pipeline'
diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py
new file mode 100644
index 0000000..0c4930e
--- /dev/null
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+
+import argparse
+import errno
+import json
+import os
+import subprocess
+import sys
+import tarfile
+import tempfile
+
+from collections import namedtuple
+from stat import *
+
+import arvados
+import arvados.commands._util as arv_cmd
+import arvados.commands.put as arv_put
+
+STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
+
+DockerImage = namedtuple('DockerImage',
+                         ['repo', 'tag', 'hash', 'created', 'vsize'])
+
+opt_parser = argparse.ArgumentParser(add_help=False)
+opt_parser.add_argument(
+    '-f', '--force', action='store_true', default=False,
+    help="Re-upload the image even if it already exists on the server")
+
+_group = opt_parser.add_mutually_exclusive_group()
+_group.add_argument(
+    '--pull', action='store_true', default=True,
+    help="Pull the latest image from Docker repositories first (default)")
+_group.add_argument(
+    '--no-pull', action='store_false', dest='pull',
+    help="Don't pull images from Docker repositories")
+
+opt_parser.add_argument(
+    'image',
+    help="Docker image to upload, as a repository name or hash")
+opt_parser.add_argument(
+    'tag', nargs='?', default='latest',
+    help="Tag of the Docker image to upload (default 'latest')")
+
+arg_parser = argparse.ArgumentParser(
+        description="Upload a Docker image to Arvados",
+        parents=[opt_parser, arv_put.run_opts])
+
+class DockerError(Exception):
+    pass
+
+
+def popen_docker(cmd, *args, **kwargs):
+    manage_stdin = ('stdin' not in kwargs)
+    kwargs.setdefault('stdin', subprocess.PIPE)
+    kwargs.setdefault('stdout', sys.stderr)
+    try:
+        docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
+    except OSError:  # No docker.io in $PATH
+        docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
+    if manage_stdin:
+        docker_proc.stdin.close()
+    return docker_proc
+
+def check_docker(proc, description):
+    proc.wait()
+    if proc.returncode != 0:
+        raise DockerError("docker {} returned status code {}".
+                          format(description, proc.returncode))
+
+def docker_images():
+    # Yield a DockerImage tuple for each installed image.
+    list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
+    list_output = iter(list_proc.stdout)
+    next(list_output)  # Ignore the header line
+    for line in list_output:
+        words = line.split()
+        size_index = len(words) - 2
+        repo, tag, imageid = words[:3]
+        ctime = ' '.join(words[3:size_index])
+        vsize = ' '.join(words[size_index:])
+        yield DockerImage(repo, tag, imageid, ctime, vsize)
+    list_proc.stdout.close()
+    check_docker(list_proc, "images")
+
+def find_image_hash(image_name, image_tag):
+    hash_search = image_name.lower()
+    hash_matches = set()
+    for image in docker_images():
+        if (image.repo == image_name) and (image.tag == image_tag):
+            return image.hash
+        elif image.hash.startswith(hash_search):
+            hash_matches.add(image.hash)
+    if len(hash_matches) == 1:
+        return hash_matches.pop()
+    return None
+
+def stat_cache_name(image_file):
+    return getattr(image_file, 'name', image_file) + '.stat'
+
+def pull_image(image_name, image_tag):
+    check_docker(popen_docker(['pull', '-t', image_tag, image_name]), "pull")
+
+def save_image(image_hash, image_file):
+    # Save the specified Docker image to image_file, then try to save its
+    # stats so we can try to resume after interruption.
+    check_docker(popen_docker(['save', image_hash], stdout=image_file),
+                 "save")
+    image_file.flush()
+    try:
+        with open(stat_cache_name(image_file), 'w') as statfile:
+            json.dump(tuple(os.fstat(image_file.fileno())), statfile)
+    except STAT_CACHE_ERRORS:
+        pass  # We won't resume from this cache.  No big deal.
+
+def prep_image_file(filename):
+    # Return a file object ready to save a Docker image,
+    # and a boolean indicating whether or not we need to actually save the
+    # image (False if a cached save is available).
+    cache_dir = arv_cmd.make_home_conf_dir(
+        os.path.join('.cache', 'arvados', 'docker'), 0o700)
+    if cache_dir is None:
+        image_file = tempfile.NamedTemporaryFile(suffix='.tar')
+        need_save = True
+    else:
+        file_path = os.path.join(cache_dir, filename)
+        try:
+            with open(stat_cache_name(file_path)) as statfile:
+                prev_stat = json.load(statfile)
+            now_stat = os.stat(file_path)
+            need_save = any(prev_stat[field] != now_stat[field]
+                            for field in [ST_MTIME, ST_SIZE])
+        except STAT_CACHE_ERRORS + (AttributeError, IndexError):
+            need_save = True  # We couldn't compare against old stats
+        image_file = open(file_path, 'w+b' if need_save else 'rb')
+    return image_file, need_save
+
+def make_link(link_class, link_name, **link_attrs):
+    link_attrs.update({'link_class': link_class, 'name': link_name})
+    return arvados.api('v1').links().create(body=link_attrs).execute()
+
+def main(arguments=None):
+    args = arg_parser.parse_args(arguments)
+
+    # Pull the image if requested, unless the image is specified as a hash
+    # that we already have.
+    if args.pull and (find_image_hash(args.image, None) is None):
+        pull_image(args.image, args.tag)
+
+    image_hash = find_image_hash(args.image, args.tag)
+    if image_hash is None:
+        print >>sys.stderr, "arv-keepdocker: No image found."
+        sys.exit(1)
+    elif not args.force:
+        # Abort if this image is already in Arvados.
+        existing_links = arvados.api('v1').links().list(
+            filters=[['link_class', '=', 'docker_image_hash'],
+                     ['name', '=', image_hash]]).execute()['items']
+        if existing_links:
+            message = [
+                "arv-keepdocker: Image {} already stored in collection(s):".
+                format(image_hash)]
+            message.extend(link['head_uuid'] for link in existing_links)
+            print >>sys.stderr, "\n".join(message)
+            sys.exit(1)
+
+    # Open a file for the saved image, and write it if needed.
+    outfile_name = '{}.tar'.format(image_hash)
+    image_file, need_save = prep_image_file(outfile_name)
+    if need_save:
+        save_image(image_hash, image_file)
+
+    # Call arv-put with switches we inherited from it
+    # (a.k.a., switches that aren't our own).
+    put_args = opt_parser.parse_known_args(arguments)[1]
+    coll_uuid = arv_put.main(
+        put_args + ['--filename', outfile_name, image_file.name]).strip()
+
+    # Read the image metadata and make Arvados links from it.
+    image_file.seek(0)
+    image_tar = tarfile.open(fileobj=image_file)
+    json_file = image_tar.extractfile(image_tar.getmember(image_hash + '/json'))
+    image_metadata = json.load(json_file)
+    json_file.close()
+    image_tar.close()
+    link_base = {'head_uuid': coll_uuid, 'properties': {}}
+    if 'created' in image_metadata:
+        link_base['properties']['image_timestamp'] = image_metadata['created']
+
+    make_link('docker_image_hash', image_hash, **link_base)
+    if not image_hash.startswith(args.image.lower()):
+        make_link('docker_image_repository', args.image, **link_base)
+        make_link('docker_image_tag', args.tag, **link_base)
+
+    # Clean up.
+    image_file.close()
+    for filename in [stat_cache_name(image_file), image_file.name]:
+        try:
+            os.unlink(filename)
+        except OSError as error:
+            if error.errno != errno.ENOENT:
+                raise
+
+if __name__ == '__main__':
+    main()
diff --git a/sdk/python/bin/arv-keepdocker b/sdk/python/bin/arv-keepdocker
new file mode 100755
index 0000000..20d9d62
--- /dev/null
+++ b/sdk/python/bin/arv-keepdocker
@@ -0,0 +1,4 @@
+#!/usr/bin/env python
+
+from arvados.commands.keepdocker import main
+main()
diff --git a/sdk/python/setup.py b/sdk/python/setup.py
index ec89977..a209863 100644
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -11,9 +11,10 @@ setup(name='arvados-python-client',
       packages=find_packages(),
       scripts=[
         'bin/arv-get',
-        'bin/arv-put',
+        'bin/arv-keepdocker',
         'bin/arv-ls',
         'bin/arv-normalize',
+        'bin/arv-put',
         ],
       install_requires=[
         'python-gflags',

commit af6ef4aefe653ac5fd7bbd028af580ddfeb8f4d9
Author: Brett Smith <brett at curoverse.com>
Date:   Tue Jun 10 11:09:42 2014 -0400

    2879: arv-put main() returns what it prints.
    
    This will enable other tools to reuse the results.

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index e4e1b6d..ef34e07 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -328,7 +328,7 @@ def progress_writer(progress_func, outfile=sys.stderr):
 def exit_signal_handler(sigcode, frame):
     sys.exit(-sigcode)
 
-def main(arguments=None):
+def main(arguments=None, output_to=sys.stdout):
     args = parse_arguments(arguments)
 
     if args.progress:
@@ -346,7 +346,8 @@ def main(arguments=None):
     except (IOError, OSError):
         pass  # Couldn't open cache directory/file.  Continue without it.
     except ResumeCacheConflict:
-        print "arv-put: Another process is already uploading this data."
+        output_to.write(
+            "arv-put: Another process is already uploading this data.\n")
         sys.exit(1)
 
     if resume_cache is None:
@@ -382,9 +383,9 @@ def main(arguments=None):
         print >>sys.stderr
 
     if args.stream:
-        print writer.manifest_text(),
+        output = writer.manifest_text()
     elif args.raw:
-        print ','.join(writer.data_locators())
+        output = ','.join(writer.data_locators())
     else:
         # Register the resulting collection in Arvados.
         collection = arvados.api().collections().create(
@@ -395,7 +396,11 @@ def main(arguments=None):
             ).execute()
 
         # Print the locator (uuid) of the new collection.
-        print collection['uuid']
+        output = collection['uuid']
+
+    output_to.write(output)
+    if not output.endswith('\n'):
+        output_to.write('\n')
 
     for sigcode, orig_handler in orig_signal_handlers.items():
         signal.signal(sigcode, orig_handler)
@@ -403,5 +408,7 @@ def main(arguments=None):
     if resume_cache is not None:
         resume_cache.destroy()
 
+    return output
+
 if __name__ == '__main__':
     main()
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py
index b7c6ed6..4687b4e 100644
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -12,6 +12,8 @@ import time
 import unittest
 import yaml
 
+from cStringIO import StringIO
+
 import arvados
 import arvados.commands.put as arv_put
 
@@ -323,9 +325,10 @@ class ArvadosPutReportTest(ArvadosBaseTestCase):
 
 class ArvadosPutTest(ArvadosKeepLocalStoreTestCase):
     def call_main_on_test_file(self):
+        self.main_output = StringIO()
         with self.make_test_file() as testfile:
             path = testfile.name
-            arv_put.main(['--stream', '--no-progress', path])
+            arv_put.main(['--stream', '--no-progress', path], self.main_output)
         self.assertTrue(
             os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
                                         '098f6bcd4621d373cade4e832627b4f6')),

commit e0b4f004501f54a1c3824ae0974f2b4619800eed
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Jun 9 12:02:01 2014 -0400

    2879: Move Python is_hex function to arvados.util.

diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index 4c2d474..82c04ea 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -25,10 +25,10 @@ global_client_object = None
 from api import *
 import config
 import arvados.errors
+import arvados.util
 
 class KeepLocator(object):
     EPOCH_DATETIME = datetime.datetime.utcfromtimestamp(0)
-    HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
 
     def __init__(self, locator_str):
         self.size = None
@@ -53,13 +53,6 @@ class KeepLocator(object):
                              self.permission_hint()]
             if s is not None)
 
-    def _is_hex_length(self, s, *size_spec):
-        if len(size_spec) == 1:
-            good_len = (len(s) == size_spec[0])
-        else:
-            good_len = (size_spec[0] <= len(s) <= size_spec[1])
-        return good_len and self.HEX_RE.match(s)
-
     def _make_hex_prop(name, length):
         # Build and return a new property with the given name that
         # must be a hex string of the given length.
@@ -67,7 +60,7 @@ class KeepLocator(object):
         def getter(self):
             return getattr(self, data_name)
         def setter(self, hex_str):
-            if not self._is_hex_length(hex_str, length):
+            if not arvados.util.is_hex(hex_str, length):
                 raise ValueError("{} must be a {}-digit hex string: {}".
                                  format(name, length, hex_str))
             setattr(self, data_name, hex_str)
@@ -82,7 +75,7 @@ class KeepLocator(object):
 
     @perm_expiry.setter
     def perm_expiry(self, value):
-        if not self._is_hex_length(value, 1, 8):
+        if not arvados.util.is_hex(value, 1, 8):
             raise ValueError(
                 "permission timestamp must be a hex Unix timestamp: {}".
                 format(value))
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 7148b92..e063f12 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -7,6 +7,8 @@ import errno
 import sys
 from arvados.collection import *
 
+HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
+
 def clear_tmpdir(path=None):
     """
     Ensure the given directory (or TASK_TMPDIR if none given)
@@ -306,3 +308,25 @@ def listdir_recursive(dirname, base=None):
         else:
             allfiles += [ent_base]
     return allfiles
+
+def is_hex(s, *length_args):
+    """is_hex(s[, length[, max_length]]) -> boolean
+
+    Return True if s is a string of hexadecimal digits.
+    If one length argument is given, the string must contain exactly
+    that number of digits.
+    If two length arguments are given, the string must contain a number of
+    digits between those two lengths, inclusive.
+    Return False otherwise.
+    """
+    num_length_args = len(length_args)
+    if num_length_args > 2:
+        raise ArgumentError("is_hex accepts up to 3 arguments ({} given)".
+                            format(1 + num_length_args))
+    elif num_length_args == 2:
+        good_len = (length_args[0] <= len(s) <= length_args[1])
+    elif num_length_args == 1:
+        good_len = (len(s) == length_args[0])
+    else:
+        good_len = True
+    return bool(good_len and HEX_RE.match(s))

commit c7f445954df35959174761ba7b1f44ecf377c87a
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Jun 9 11:13:39 2014 -0400

    2879: Split arv-put's ArgumentParser into module segments.
    
    This will let us use different pieces as a parent parser for other
    tools, which I plan to do for the Docker image uploader.

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index b9135b9..e4e1b6d 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -19,118 +19,122 @@ import arvados.commands._util as arv_cmd
 
 CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
 
-def parse_arguments(arguments):
-    parser = argparse.ArgumentParser(
-        description='Copy data from the local filesystem to Keep.')
-
-    parser.add_argument('paths', metavar='path', type=str, nargs='*',
-                        help="""
-    Local file or directory. Default: read from standard input.
-    """)
-
-    parser.add_argument('--max-manifest-depth', type=int, metavar='N',
-                        default=-1, help="""
-    Maximum depth of directory tree to represent in the manifest
-    structure. A directory structure deeper than this will be represented
-    as a single stream in the manifest. If N=0, the manifest will contain
-    a single stream. Default: -1 (unlimited), i.e., exactly one manifest
-    stream per filesystem directory that contains files.
-    """)
-
-    group = parser.add_mutually_exclusive_group()
-
-    group.add_argument('--as-stream', action='store_true', dest='stream',
-                       help="""
-    Synonym for --stream.
-    """)
-
-    group.add_argument('--stream', action='store_true',
-                       help="""
-    Store the file content and display the resulting manifest on
-    stdout. Do not write the manifest to Keep or save a Collection object
-    in Arvados.
-    """)
-
-    group.add_argument('--as-manifest', action='store_true', dest='manifest',
-                       help="""
-    Synonym for --manifest.
-    """)
-
-    group.add_argument('--in-manifest', action='store_true', dest='manifest',
-                       help="""
-    Synonym for --manifest.
-    """)
-
-    group.add_argument('--manifest', action='store_true',
-                       help="""
-    Store the file data and resulting manifest in Keep, save a Collection
-    object in Arvados, and display the manifest locator (Collection uuid)
-    on stdout. This is the default behavior.
-    """)
-
-    group.add_argument('--as-raw', action='store_true', dest='raw',
-                       help="""
-    Synonym for --raw.
-    """)
-
-    group.add_argument('--raw', action='store_true',
-                       help="""
-    Store the file content and display the data block locators on stdout,
-    separated by commas, with a trailing newline. Do not store a
-    manifest.
-    """)
-
-    parser.add_argument('--use-filename', type=str, default=None,
-                        dest='filename', help="""
-    Synonym for --filename.
-    """)
-
-    parser.add_argument('--filename', type=str, default=None,
-                        help="""
-    Use the given filename in the manifest, instead of the name of the
-    local file. This is useful when "-" or "/dev/stdin" is given as an
-    input file. It can be used only if there is exactly one path given and
-    it is not a directory. Implies --manifest.
-    """)
-
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument('--progress', action='store_true',
-                       help="""
-    Display human-readable progress on stderr (bytes and, if possible,
-    percentage of total data size). This is the default behavior when
-    stderr is a tty.
-    """)
+upload_opts = argparse.ArgumentParser(add_help=False)
+
+upload_opts.add_argument('paths', metavar='path', type=str, nargs='*',
+                    help="""
+Local file or directory. Default: read from standard input.
+""")
+
+upload_opts.add_argument('--max-manifest-depth', type=int, metavar='N',
+                    default=-1, help="""
+Maximum depth of directory tree to represent in the manifest
+structure. A directory structure deeper than this will be represented
+as a single stream in the manifest. If N=0, the manifest will contain
+a single stream. Default: -1 (unlimited), i.e., exactly one manifest
+stream per filesystem directory that contains files.
+""")
+
+_group = upload_opts.add_mutually_exclusive_group()
+
+_group.add_argument('--as-stream', action='store_true', dest='stream',
+                   help="""
+Synonym for --stream.
+""")
+
+_group.add_argument('--stream', action='store_true',
+                   help="""
+Store the file content and display the resulting manifest on
+stdout. Do not write the manifest to Keep or save a Collection object
+in Arvados.
+""")
+
+_group.add_argument('--as-manifest', action='store_true', dest='manifest',
+                   help="""
+Synonym for --manifest.
+""")
+
+_group.add_argument('--in-manifest', action='store_true', dest='manifest',
+                   help="""
+Synonym for --manifest.
+""")
+
+_group.add_argument('--manifest', action='store_true',
+                   help="""
+Store the file data and resulting manifest in Keep, save a Collection
+object in Arvados, and display the manifest locator (Collection uuid)
+on stdout. This is the default behavior.
+""")
+
+_group.add_argument('--as-raw', action='store_true', dest='raw',
+                   help="""
+Synonym for --raw.
+""")
+
+_group.add_argument('--raw', action='store_true',
+                   help="""
+Store the file content and display the data block locators on stdout,
+separated by commas, with a trailing newline. Do not store a
+manifest.
+""")
+
+upload_opts.add_argument('--use-filename', type=str, default=None,
+                    dest='filename', help="""
+Synonym for --filename.
+""")
+
+upload_opts.add_argument('--filename', type=str, default=None,
+                    help="""
+Use the given filename in the manifest, instead of the name of the
+local file. This is useful when "-" or "/dev/stdin" is given as an
+input file. It can be used only if there is exactly one path given and
+it is not a directory. Implies --manifest.
+""")
+
+run_opts = argparse.ArgumentParser(add_help=False)
+_group = run_opts.add_mutually_exclusive_group()
+_group.add_argument('--progress', action='store_true',
+                   help="""
+Display human-readable progress on stderr (bytes and, if possible,
+percentage of total data size). This is the default behavior when
+stderr is a tty.
+""")
+
+_group.add_argument('--no-progress', action='store_true',
+                   help="""
+Do not display human-readable progress on stderr, even if stderr is a
+tty.
+""")
+
+_group.add_argument('--batch-progress', action='store_true',
+                   help="""
+Display machine-readable progress on stderr (bytes and, if known,
+total data size).
+""")
+
+_group = run_opts.add_mutually_exclusive_group()
+_group.add_argument('--resume', action='store_true', default=True,
+                   help="""
+Continue interrupted uploads from cached state (default).
+""")
+_group.add_argument('--no-resume', action='store_false', dest='resume',
+                   help="""
+Do not continue interrupted uploads from cached state.
+""")
+
+arg_parser = argparse.ArgumentParser(
+    description='Copy data from the local filesystem to Keep.',
+    parents=[upload_opts, run_opts])
 
-    group.add_argument('--no-progress', action='store_true',
-                       help="""
-    Do not display human-readable progress on stderr, even if stderr is a
-    tty.
-    """)
-
-    group.add_argument('--batch-progress', action='store_true',
-                       help="""
-    Display machine-readable progress on stderr (bytes and, if known,
-    total data size).
-    """)
-
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument('--resume', action='store_true', default=True,
-                       help="""
-    Continue interrupted uploads from cached state (default).
-    """)
-    group.add_argument('--no-resume', action='store_false', dest='resume',
-                       help="""
-    Do not continue interrupted uploads from cached state.
-    """)
-
-    args = parser.parse_args(arguments)
+def parse_arguments(arguments):
+    args = arg_parser.parse_args(arguments)
 
     if len(args.paths) == 0:
         args.paths += ['/dev/stdin']
 
     if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
         if args.filename:
-            parser.error("""
+            arg_parser.error("""
     --filename argument cannot be used when storing a directory or
     multiple files.
     """)

commit 75483667d22a4c40ab238a13cb42d565304e05c5
Author: Brett Smith <brett at curoverse.com>
Date:   Mon Jun 9 11:06:20 2014 -0400

    2879: Factor out arv-put's cache creation to a utility library.
    
    This function will be useful for the Docker image uploader, and
    probably other tools as well.

diff --git a/sdk/python/arvados/commands/_util.py b/sdk/python/arvados/commands/_util.py
new file mode 100644
index 0000000..f7cb80d
--- /dev/null
+++ b/sdk/python/arvados/commands/_util.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+
+import errno
+import os
+
+def _ignore_error(error):
+    return None
+
+def _raise_error(error):
+    raise error
+
+def make_home_conf_dir(path, mode=None, errors='ignore'):
+    # Make the directory path under the user's home directory, making parent
+    # directories as needed.
+    # If the directory is newly created, and a mode is specified, chmod it
+    # with those permissions.
+    # If there's an error, return None if errors is 'ignore', else raise an
+    # exception.
+    error_handler = _ignore_error if (errors == 'ignore') else _raise_error
+    tilde_path = os.path.join('~', path)
+    abs_path = os.path.expanduser(tilde_path)
+    if abs_path == tilde_path:
+        return error_handler(ValueError("no home directory available"))
+    try:
+        os.makedirs(abs_path)
+    except OSError as error:
+        if error.errno != errno.EEXIST:
+            return error_handler(error)
+    else:
+        if mode is not None:
+            os.chmod(abs_path, mode)
+    return abs_path
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 01bae2f..b9135b9 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -15,6 +15,8 @@ import signal
 import sys
 import tempfile
 
+import arvados.commands._util as arv_cmd
+
 CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
 
 def parse_arguments(arguments):
@@ -150,17 +152,11 @@ class ResumeCacheConflict(Exception):
 
 
 class ResumeCache(object):
-    CACHE_DIR = os.path.expanduser('~/.cache/arvados/arv-put')
+    CACHE_DIR = '.cache/arvados/arv-put'
 
     @classmethod
     def setup_user_cache(cls):
-        try:
-            os.makedirs(cls.CACHE_DIR)
-        except OSError as error:
-            if error.errno != errno.EEXIST:
-                raise
-        else:
-            os.chmod(cls.CACHE_DIR, 0o700)
+        return arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700)
 
     def __init__(self, file_spec):
         self.cache_file = open(file_spec, 'a+')
@@ -339,16 +335,18 @@ def main(arguments=None):
         reporter = None
     bytes_expected = expected_bytes_for(args.paths)
 
+    resume_cache = None
     try:
-        ResumeCache.setup_user_cache()
-        resume_cache = ResumeCache(ResumeCache.make_path(args))
+        if ResumeCache.setup_user_cache() is not None:
+            resume_cache = ResumeCache(ResumeCache.make_path(args))
     except (IOError, OSError):
-        # Couldn't open cache directory/file.  Continue without it.
-        resume_cache = None
-        writer = ArvPutCollectionWriter(resume_cache, reporter, bytes_expected)
+        pass  # Couldn't open cache directory/file.  Continue without it.
     except ResumeCacheConflict:
         print "arv-put: Another process is already uploading this data."
         sys.exit(1)
+
+    if resume_cache is None:
+        writer = ArvPutCollectionWriter(resume_cache, reporter, bytes_expected)
     else:
         if not args.resume:
             resume_cache.restart()

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list