[ARVADOS] created: 743f59e50b1a022b8933e12ca2e3ab17a9b6a0ee
git at public.curoverse.com
git at public.curoverse.com
Thu Apr 9 10:13:33 EDT 2015
at 743f59e50b1a022b8933e12ca2e3ab17a9b6a0ee (commit)
commit 743f59e50b1a022b8933e12ca2e3ab17a9b6a0ee
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Apr 9 10:15:17 2015 -0400
4223: Add --download option to arv-keepdocker to fetch a Docker image from Keep
and load it locally. Refactor keepdocker to enable SDK users to better access
upload/download functionality. Fix API requests to use list_all() so that all
results are returned.
diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py
index 933fd77..59e0c69 100644
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -5,6 +5,7 @@ import datetime
import errno
import json
import os
+import re
import subprocess
import sys
import tarfile
@@ -15,8 +16,10 @@ from collections import namedtuple
from stat import *
import arvados
+from arvados.util import list_all
import arvados.commands._util as arv_cmd
import arvados.commands.put as arv_put
+import arvados.errors
STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
@@ -28,6 +31,10 @@ keepdocker_parser.add_argument(
'-f', '--force', action='store_true', default=False,
help="Re-upload the image even if it already exists on the server")
+keepdocker_parser.add_argument(
+ '--no-trunc', action='store_true', default=False,
+ help="Don't truncate Docker image hashes in output.")
+
_group = keepdocker_parser.add_mutually_exclusive_group()
_group.add_argument(
'--pull', action='store_true', default=False,
@@ -36,9 +43,17 @@ _group.add_argument(
'--no-pull', action='store_false', dest='pull',
help="Use locally installed image only, don't pull image from Docker registry (default)")
+_group = keepdocker_parser.add_mutually_exclusive_group()
+_group.add_argument(
+ '--download', action='store_true', default=False,
+ help="Fetch Docker image from Arvados and load locally.")
+_group.add_argument(
+ '--upload', action='store_true', default=False,
+ help="Upload local Docker image to Arvados (default)")
+
keepdocker_parser.add_argument(
'image', nargs='?',
- help="Docker image to upload, as a repository name or hash")
+ help="Docker image as a repository name or hash")
keepdocker_parser.add_argument(
'tag', nargs='?', default='latest',
help="Tag of the Docker image to upload (default 'latest')")
@@ -47,7 +62,7 @@ keepdocker_parser.add_argument(
# The options inherited from arv-put include --name, --project-uuid,
# --progress/--no-progress/--batch-progress and --resume/--no-resume.
arg_parser = argparse.ArgumentParser(
- description="Upload or list Docker images in Arvados",
+ description="Upload, download or list Docker images in Arvados",
parents=[keepdocker_parser, arv_put.run_opts, arv_cmd.retry_opt])
class DockerError(Exception):
@@ -165,7 +180,7 @@ def ptimestamp(t):
t = s[0] + s[1][-1:]
return datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%SZ")
-def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
+def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None, image_hash=None, image_collection=None):
"""List all Docker images known to the api_client with image_name and
image_tag. If no image_name is given, defaults to listing all
Docker images.
@@ -181,10 +196,17 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
if image_name:
image_link_name = "{}:{}".format(image_name, image_tag or 'latest')
docker_image_filters.append(['name', '=', image_link_name])
+ elif image_hash:
+ docker_image_filters.append(['name', '=', image_hash])
+ elif image_collection:
+ docker_image_filters.append(['head_uuid', '=', image_collection])
- existing_links = api_client.links().list(
- filters=docker_image_filters
- ).execute(num_retries=num_retries)['items']
+ existing_links = list_all(api_client.links().list, num_retries, filters=docker_image_filters)
+
+ if image_name or image_hash:
+ existing_links = list_all(api_client.links().list, num_retries,
+ filters=[['link_class', 'in', ['docker_image_hash', 'docker_image_repo+tag']],
+ ['head_uuid', 'in', [u['head_uuid'] for u in existing_links]]])
images = {}
for link in existing_links:
collection_uuid = link["head_uuid"]
@@ -210,22 +232,44 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
return sorted(images.items(), lambda a, b: cmp(b[1]["timestamp"], a[1]["timestamp"]))
+def image_hash_in_collection(cr):
+ if len(cr) != 1:
+ raise arvados.errors.ArgumentError("docker_image_locator must only contain a single file")
-def main(arguments=None):
- args = arg_parser.parse_args(arguments)
- api = arvados.api('v1')
-
- if args.image is None or args.image == 'images':
- fmt = "{:30} {:10} {:12} {:29} {:20}"
- print fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED")
- for i, j in list_images_in_arv(api, args.retries):
- print(fmt.format(j["repo"], j["tag"], j["dockerhash"][0:12], i, j["timestamp"].strftime("%c")))
- sys.exit(0)
+ docker_image = re.match("([0-9a-f]{64})\.tar", cr.keys()[0])
+ if docker_image:
+ return docker_image.group(1)
+ else:
+ return None
+
+def load_image_from_collection(api_client, docker_image_locator):
+ cr = arvados.CollectionReader(docker_image_locator, api_client=api_client)
+ docker_image = image_hash_in_collection(cr)
+ if docker_image:
+ for d in docker_images():
+ if d.hash == docker_image:
+ print "Docker image '%s' is already loaded" % docker_image
+ return docker_image
+
+ with cr.open(docker_image+".tar") as img:
+ docker_load = subprocess.Popen(["docker", "load"], stdin=subprocess.PIPE)
+ data = img.read(64000)
+ n = len(data)
+ while data:
+ docker_load.stdin.write(data)
+ data = img.read(1024*1024)
+ n += len(data)
+ docker_load.stdin.close()
+ docker_load.wait()
+ if docker_load.returncode != 0:
+ raise arvados.errors.CommandFailedError("Failed to load image")
+
+ return docker_image
+ else:
+ raise arvados.errors.ArgumentError("Failed to find Docker image in collection %s" % docker_image_locator)
- # Pull the image if requested, unless the image is specified as a hash
- # that we already have.
- if args.pull and not find_image_hashes(args.image):
- pull_image(args.image, args.tag)
+def upload_image(api, arguments):
+ args = arg_parser.parse_args(arguments)
try:
image_hash = find_one_image_hash(args.image, args.tag)
@@ -254,24 +298,21 @@ def main(arguments=None):
num_retries=args.retries)['uuid']
# Find image hash tags
- existing_links = api.links().list(
- filters=[['link_class', '=', 'docker_image_hash'],
- ['name', '=', image_hash]]
- ).execute(num_retries=args.retries)['items']
+ existing_links = list_all(api.links().list, num_retries=args.retries,
+ filters=[['link_class', '=', 'docker_image_hash'],
+ ['name', '=', image_hash]])
if existing_links:
# get readable collections
- collections = api.collections().list(
- filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
- select=["uuid", "owner_uuid", "name", "manifest_text"]
- ).execute(num_retries=args.retries)['items']
+ collections = list_all(api.collections().list, num_retries=args.retries,
+ filters=[['uuid', 'in', [link['head_uuid'] for link in existing_links]]],
+ select=["uuid", "owner_uuid", "name", "manifest_text"])
if collections:
# check for repo+tag links on these collections
- existing_repo_tag = (api.links().list(
+ existing_repo_tag = list_all(api.links().list, num_retries=args.retries,
filters=[['link_class', '=', 'docker_image_repo+tag'],
['name', '=', image_repo_tag],
- ['head_uuid', 'in', collections]]
- ).execute(num_retries=args.retries)['items']) if image_repo_tag else []
+ ['head_uuid', 'in', collections]]) if image_repo_tag else []
# Filter on elements owned by the parent project
owned_col = [c for c in collections if c['owner_uuid'] == parent_project_uuid]
@@ -350,5 +391,58 @@ def main(arguments=None):
if error.errno != errno.ENOENT:
raise
+
+def main(arguments=None):
+ args = arg_parser.parse_args(arguments)
+ api = arvados.api('v1')
+
+ if args.image is None or args.image == 'images':
+ if args.no_trunc:
+ fmt = "{:30} {:10} {:64} {:29} {:20}"
+ else:
+ fmt = "{:30} {:10} {:12} {:29} {:20}"
+ print fmt.format("REPOSITORY", "TAG", "IMAGE ID", "COLLECTION", "CREATED")
+ for i, j in list_images_in_arv(api, args.retries):
+ print(fmt.format(j["repo"], j["tag"],
+ j["dockerhash"] if args.no_trunc else j["dockerhash"][0:12],
+ i, j["timestamp"].strftime("%c")))
+ sys.exit(0)
+
+ if args.download:
+ # search by name and tag
+ imgs_in_arv = list_images_in_arv(api, args.retries, image_name=args.image)
+ do_tag = True
+
+ if not imgs_in_arv:
+ # searh by image hash
+ imgs_in_arv = list_images_in_arv(api, args.retries, image_hash=args.image)
+ do_tag = False
+
+ if not imgs_in_arv and arvados.util.collection_uuid_pattern.match(args.image):
+ # search by collection uuid
+ imgs_in_arv = list_images_in_arv(api, args.retries, image_collection=args.image)
+ do_tag = True
+
+ if not imgs_in_arv and arvados.util.keep_locator_pattern.match(args.image):
+ # search by manifest portable data hash
+ imgs_in_arv = [[args.image]]
+ do_tag = False
+
+ if imgs_in_arv:
+ imghash = load_image_from_collection(api, imgs_in_arv[0][0])
+ if do_tag:
+ popen_docker(["tag", imghash, args.image], stdin=None, stdout=None).wait()
+ sys.exit(0)
+ else:
+ print >>sys.stderr, "arv-keepdocker: Docker image '%s' not found in Arvados" % args.image
+ sys.exit(1)
+
+ # Pull the image if requested, unless the image is specified as a hash
+ # that we already have.
+ if args.pull and not find_image_hashes(args.image):
+ pull_image(args.image, args.tag)
+
+ upload_image(api, arguments)
+
if __name__ == '__main__':
main()
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list