[ARVADOS] updated: c820bfc91be7635739bad0857ba3a385d1334b6a
Git user
git at public.curoverse.com
Fri Mar 31 12:42:17 EDT 2017
Summary of changes:
sdk/python/arvados/commands/get.py | 256 +++++++++++++++++++++++++++++++++++++
sdk/python/arvados/commands/ls.py | 15 ++-
sdk/python/bin/arv-get | 235 +---------------------------------
sdk/python/tests/test_arv_ls.py | 11 +-
4 files changed, 276 insertions(+), 241 deletions(-)
create mode 100755 sdk/python/arvados/commands/get.py
via c820bfc91be7635739bad0857ba3a385d1334b6a (commit)
via fb1c8e81a200c11b1130f3a9af586f1bbf8c19b3 (commit)
via 0568c2d42703a7b839f2661968c05a23753f67c3 (commit)
via 154ae0d4b13329bae1033ab99095c00d0b0f66e2 (commit)
via ae7f5a9c869927336dc81cf0552b955457a09647 (commit)
from 4afa501317cc04596377bcff8a7b2fccf7ab8d8d (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit c820bfc91be7635739bad0857ba3a385d1334b6a
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Fri Mar 31 13:40:23 2017 -0300
7824: Use logging facility to show error messages.
diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py
index d762bbd..67f38c4 100755
--- a/sdk/python/arvados/commands/get.py
+++ b/sdk/python/arvados/commands/get.py
@@ -14,10 +14,7 @@ import arvados.commands._util as arv_cmd
from arvados._version import __version__
api_client = None
-
-def abort(msg, code=1):
- print >>sys.stderr, "arv-get:", msg
- exit(code)
+logger = logging.getLogger('arvados.arv-get')
parser = argparse.ArgumentParser(
description='Copy data from Keep to a local file or pipe.',
@@ -88,8 +85,8 @@ overwritten. This option causes even devices, sockets, and fifos to be
skipped.
""")
-def parse_arguments(arguments, logger):
- args = parser.parse_args()
+def parse_arguments(arguments, stdout, stderr):
+ args = parser.parse_args(arguments)
if args.locator[-1] == os.sep:
args.r = True
@@ -120,17 +117,16 @@ def parse_arguments(arguments, logger):
# either going to a named file, or going (via stdout) to something
# that isn't a tty.
if (not (args.batch_progress or args.no_progress)
- and sys.stderr.isatty()
+ and stderr.isatty()
and (args.destination != '-'
- or not sys.stdout.isatty())):
+ or not stdout.isatty())):
args.progress = True
return args
def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
global api_client
- logger = logging.getLogger('arvados.arv-get')
- args = parse_arguments(arguments, logger)
+ args = parse_arguments(arguments, stdout, stderr)
if api_client is None:
api_client = arvados.api('v1')
@@ -148,16 +144,18 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
open_flags |= os.O_EXCL
try:
if args.destination == "-":
- sys.stdout.write(reader.manifest_text())
+ stdout.write(reader.manifest_text())
else:
out_fd = os.open(args.destination, open_flags)
with os.fdopen(out_fd, 'wb') as out_file:
out_file.write(reader.manifest_text())
except (IOError, OSError) as error:
- abort("can't write to '{}': {}".format(args.destination, error))
+ logger.error("can't write to '{}': {}".format(args.destination, error))
+ return 1
except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
- abort("failed to download '{}': {}".format(collection, error))
- sys.exit(0)
+ logger.error("failed to download '{}': {}".format(collection, error))
+ return 1
+ return 0
# Scan the collection. Make an array of (stream, file, local
# destination filename) tuples, and add up total size to extract.
@@ -177,7 +175,8 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
if (not (args.n or args.f or args.skip_existing) and
os.path.exists(dest_path)):
- abort('Local file %s already exists.' % (dest_path,))
+ logger.error('Local file %s already exists.' % (dest_path,))
+ return 1
else:
if os.path.join(s.stream_name(), f.name) != '.' + get_prefix:
continue
@@ -185,7 +184,8 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
todo += [(s, f, dest_path)]
todo_bytes += f.size()
except arvados.errors.NotFoundError as e:
- abort(e)
+ logger.error(e)
+ return 1
out_bytes = 0
for s, f, outfilename in todo:
@@ -193,7 +193,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
digestor = None
if not args.n:
if outfilename == "-":
- outfile = sys.stdout
+ outfile = stdout
else:
if args.skip_existing and os.path.exists(outfilename):
logger.debug('Local file %s exists. Skipping.', outfilename)
@@ -202,13 +202,15 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
os.path.isdir(outfilename)):
# Good thing we looked again: apparently this file wasn't
# here yet when we checked earlier.
- abort('Local file %s already exists.' % (outfilename,))
+ logger.error('Local file %s already exists.' % (outfilename,))
+ return 1
if args.r:
arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
try:
outfile = open(outfilename, 'wb')
except Exception as error:
- abort('Open(%s) failed: %s' % (outfilename, error))
+ logger.error('Open(%s) failed: %s' % (outfilename, error))
+ return 1
if args.hash:
digestor = hashlib.new(args.hash)
try:
@@ -220,26 +222,26 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
digestor.update(data)
out_bytes += len(data)
if args.progress:
- sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
- (out_bytes >> 20,
- todo_bytes >> 20,
- (100
- if todo_bytes==0
- else 100.0*out_bytes/todo_bytes)))
+ stderr.write('\r%d MiB / %d MiB %.1f%%' %
+ (out_bytes >> 20,
+ todo_bytes >> 20,
+ (100
+ if todo_bytes==0
+ else 100.0*out_bytes/todo_bytes)))
elif args.batch_progress:
- sys.stderr.write('%s %d read %d total\n' %
- (sys.argv[0], os.getpid(),
- out_bytes, todo_bytes))
+ stderr.write('%s %d read %d total\n' %
+ (sys.argv[0], os.getpid(),
+ out_bytes, todo_bytes))
if digestor:
- sys.stderr.write("%s %s/%s\n"
- % (digestor.hexdigest(), s.stream_name(), f.name))
+ stderr.write("%s %s/%s\n"
+ % (digestor.hexdigest(), s.stream_name(), f.name))
except KeyboardInterrupt:
if outfile and (outfile.fileno() > 2) and not outfile.closed:
os.unlink(outfile.name)
break
if args.progress:
- sys.stderr.write('\n')
+ stderr.write('\n')
def files_in_collection(c):
# Sort first by file type, then alphabetically by file path.
diff --git a/sdk/python/arvados/commands/ls.py b/sdk/python/arvados/commands/ls.py
index cff7b55..918ce5e 100755
--- a/sdk/python/arvados/commands/ls.py
+++ b/sdk/python/arvados/commands/ls.py
@@ -4,6 +4,7 @@ from __future__ import print_function
import argparse
import collections
+import logging
import sys
import arvados
@@ -34,19 +35,21 @@ def size_formatter(coll_file):
def name_formatter(coll_file):
return "{}/{}".format(coll_file.stream_name, coll_file.name)
-def main(args, stdout, stderr, api_client=None):
+def main(args, stdout, stderr, api_client=None, logger=None):
args = parse_args(args)
if api_client is None:
api_client = arvados.api('v1')
+ if logger is None:
+ logger = logging.getLogger('arvados.arv-ls')
+
try:
cr = arvados.CollectionReader(args.locator, api_client=api_client,
num_retries=args.retries)
except (arvados.errors.ArgumentError,
arvados.errors.NotFoundError) as error:
- print("arv-ls: error fetching collection: {}".format(error),
- file=stderr)
+ logger.error("error fetching collection: {}".format(error))
return 1
formatters = []
diff --git a/sdk/python/tests/test_arv_ls.py b/sdk/python/tests/test_arv_ls.py
index 5064f07..99b5510 100644
--- a/sdk/python/tests/test_arv_ls.py
+++ b/sdk/python/tests/test_arv_ls.py
@@ -35,10 +35,10 @@ class ArvLsTestCase(run_test_server.TestCaseWithServers):
api_client.collections().get().execute.return_value = coll_info
return coll_info, api_client
- def run_ls(self, args, api_client):
+ def run_ls(self, args, api_client, logger=None):
self.stdout = io.BytesIO()
self.stderr = io.BytesIO()
- return arv_ls.main(args, self.stdout, self.stderr, api_client)
+ return arv_ls.main(args, self.stdout, self.stderr, api_client, logger)
def test_plain_listing(self):
collection, api_client = self.mock_api_for_manifest(
@@ -76,10 +76,13 @@ class ArvLsTestCase(run_test_server.TestCaseWithServers):
def test_locator_failure(self):
api_client = mock.MagicMock(name='mock_api_client')
+ error_mock = mock.MagicMock()
+ logger = mock.MagicMock()
+ logger.error = error_mock
api_client.collections().get().execute.side_effect = (
arv_error.NotFoundError)
- self.assertNotEqual(0, self.run_ls([self.FAKE_UUID], api_client))
- self.assertNotEqual('', self.stderr.getvalue())
+ self.assertNotEqual(0, self.run_ls([self.FAKE_UUID], api_client, logger))
+ self.assertEqual(1, error_mock.call_count)
def test_version_argument(self):
err = io.BytesIO()
commit fb1c8e81a200c11b1130f3a9af586f1bbf8c19b3
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Fri Mar 31 11:24:50 2017 -0300
7824: Updated code to use newer Collection APIs, leaving all_streams() and all_files() behind.
diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py
index b23f2d0..d762bbd 100755
--- a/sdk/python/arvados/commands/get.py
+++ b/sdk/python/arvados/commands/get.py
@@ -159,41 +159,36 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
abort("failed to download '{}': {}".format(collection, error))
sys.exit(0)
- reader.normalize()
-
# Scan the collection. Make an array of (stream, file, local
# destination filename) tuples, and add up total size to extract.
todo = []
todo_bytes = 0
try:
- for s in reader.all_streams():
- for f in s.all_files():
- if get_prefix and get_prefix[-1] == os.sep:
- if 0 != string.find(os.path.join(s.name(), f.name()),
- '.' + get_prefix):
- continue
- if args.destination == "-":
- dest_path = "-"
- else:
- dest_path = os.path.join(
- args.destination,
- os.path.join(s.name(), f.name())[len(get_prefix)+1:])
- if (not (args.n or args.f or args.skip_existing) and
- os.path.exists(dest_path)):
- abort('Local file %s already exists.' % (dest_path,))
+ for s, f in files_in_collection(reader):
+ if get_prefix and get_prefix[-1] == os.sep:
+ if 0 != string.find(os.path.join(s.stream_name(), f.name),
+ '.' + get_prefix):
+ continue
+ if args.destination == "-":
+ dest_path = "-"
else:
- if os.path.join(s.name(), f.name()) != '.' + get_prefix:
- continue
- dest_path = args.destination
- todo += [(s, f, dest_path)]
- todo_bytes += f.size()
+ dest_path = os.path.join(
+ args.destination,
+ os.path.join(s.stream_name(), f.name)[len(get_prefix)+1:])
+ if (not (args.n or args.f or args.skip_existing) and
+ os.path.exists(dest_path)):
+ abort('Local file %s already exists.' % (dest_path,))
+ else:
+ if os.path.join(s.stream_name(), f.name) != '.' + get_prefix:
+ continue
+ dest_path = args.destination
+ todo += [(s, f, dest_path)]
+ todo_bytes += f.size()
except arvados.errors.NotFoundError as e:
abort(e)
- # Read data, and (if not -n) write to local file(s) or pipe.
-
out_bytes = 0
- for s,f,outfilename in todo:
+ for s, f, outfilename in todo:
outfile = None
digestor = None
if not args.n:
@@ -217,26 +212,27 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
if args.hash:
digestor = hashlib.new(args.hash)
try:
- for data in f.readall():
- if outfile:
- outfile.write(data)
- if digestor:
- digestor.update(data)
- out_bytes += len(data)
- if args.progress:
- sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
- (out_bytes >> 20,
- todo_bytes >> 20,
- (100
- if todo_bytes==0
- else 100.0*out_bytes/todo_bytes)))
- elif args.batch_progress:
- sys.stderr.write('%s %d read %d total\n' %
- (sys.argv[0], os.getpid(),
- out_bytes, todo_bytes))
+ with s.open(f.name, 'r') as file_reader:
+ for data in file_reader.readall():
+ if outfile:
+ outfile.write(data)
+ if digestor:
+ digestor.update(data)
+ out_bytes += len(data)
+ if args.progress:
+ sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
+ (out_bytes >> 20,
+ todo_bytes >> 20,
+ (100
+ if todo_bytes==0
+ else 100.0*out_bytes/todo_bytes)))
+ elif args.batch_progress:
+ sys.stderr.write('%s %d read %d total\n' %
+ (sys.argv[0], os.getpid(),
+ out_bytes, todo_bytes))
if digestor:
sys.stderr.write("%s %s/%s\n"
- % (digestor.hexdigest(), s.name(), f.name()))
+ % (digestor.hexdigest(), s.stream_name(), f.name))
except KeyboardInterrupt:
if outfile and (outfile.fileno() > 2) and not outfile.closed:
os.unlink(outfile.name)
@@ -244,3 +240,15 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
if args.progress:
sys.stderr.write('\n')
+
+def files_in_collection(c):
+ # Sort first by file type, then alphabetically by file path.
+ for i in sorted(c.keys(),
+ key=lambda k: (
+ isinstance(c[k], arvados.collection.Subcollection),
+ k.upper())):
+ if isinstance(c[i], arvados.arvfile.ArvadosFile):
+ yield (c, c[i])
+ elif isinstance(c[i], arvados.collection.Subcollection):
+ for s, f in files_in_collection(c[i]):
+ yield (s, f)
commit 0568c2d42703a7b839f2661968c05a23753f67c3
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Thu Mar 30 15:40:41 2017 -0300
7824: Moved arv-get code to arvados.commands and replaced bin/arv-get with a stub caller.
diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py
new file mode 100755
index 0000000..b23f2d0
--- /dev/null
+++ b/sdk/python/arvados/commands/get.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python
+
+import argparse
+import hashlib
+import os
+import re
+import string
+import sys
+import logging
+
+import arvados
+import arvados.commands._util as arv_cmd
+
+from arvados._version import __version__
+
+api_client = None
+
+def abort(msg, code=1):
+ print >>sys.stderr, "arv-get:", msg
+ exit(code)
+
+parser = argparse.ArgumentParser(
+ description='Copy data from Keep to a local file or pipe.',
+ parents=[arv_cmd.retry_opt])
+parser.add_argument('--version', action='version',
+ version="%s %s" % (sys.argv[0], __version__),
+ help='Print version and exit.')
+parser.add_argument('locator', type=str,
+ help="""
+Collection locator, optionally with a file path or prefix.
+""")
+parser.add_argument('destination', type=str, nargs='?', default='-',
+ help="""
+Local file or directory where the data is to be written. Default: stdout.
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('--progress', action='store_true',
+ help="""
+Display human-readable progress on stderr (bytes and, if possible,
+percentage of total data size). This is the default behavior when it
+is not expected to interfere with the output: specifically, stderr is
+a tty _and_ either stdout is not a tty, or output is being written to
+named files rather than stdout.
+""")
+group.add_argument('--no-progress', action='store_true',
+ help="""
+Do not display human-readable progress on stderr.
+""")
+group.add_argument('--batch-progress', action='store_true',
+ help="""
+Display machine-readable progress on stderr (bytes and, if known,
+total data size).
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('--hash',
+ help="""
+Display the hash of each file as it is read from Keep, using the given
+hash algorithm. Supported algorithms include md5, sha1, sha224,
+sha256, sha384, and sha512.
+""")
+group.add_argument('--md5sum', action='store_const',
+ dest='hash', const='md5',
+ help="""
+Display the MD5 hash of each file as it is read from Keep.
+""")
+parser.add_argument('-n', action='store_true',
+ help="""
+Do not write any data -- just read from Keep, and report md5sums if
+requested.
+""")
+parser.add_argument('-r', action='store_true',
+ help="""
+Retrieve all files in the specified collection/prefix. This is the
+default behavior if the "locator" argument ends with a forward slash.
+""")
+group = parser.add_mutually_exclusive_group()
+group.add_argument('-f', action='store_true',
+ help="""
+Overwrite existing files while writing. The default behavior is to
+refuse to write *anything* if any of the output files already
+exist. As a special case, -f is not needed to write to stdout.
+""")
+group.add_argument('--skip-existing', action='store_true',
+ help="""
+Skip files that already exist. The default behavior is to refuse to
+write *anything* if any files exist that would have to be
+overwritten. This option causes even devices, sockets, and fifos to be
+skipped.
+""")
+
+def parse_arguments(arguments, logger):
+ args = parser.parse_args()
+
+ if args.locator[-1] == os.sep:
+ args.r = True
+ if (args.r and
+ not args.n and
+ not (args.destination and
+ os.path.isdir(args.destination))):
+ parser.error('Destination is not a directory.')
+ if not args.r and (os.path.isdir(args.destination) or
+ args.destination[-1] == os.path.sep):
+ args.destination = os.path.join(args.destination,
+ os.path.basename(args.locator))
+ logger.debug("Appended source file name to destination directory: %s",
+ args.destination)
+
+ if args.destination == '/dev/stdout':
+ args.destination = "-"
+
+ if args.destination == '-':
+ # Normally you have to use -f to write to a file (or device) that
+ # already exists, but "-" and "/dev/stdout" are common enough to
+ # merit a special exception.
+ args.f = True
+ else:
+ args.destination = args.destination.rstrip(os.sep)
+
+ # Turn on --progress by default if stderr is a tty and output is
+ # either going to a named file, or going (via stdout) to something
+ # that isn't a tty.
+ if (not (args.batch_progress or args.no_progress)
+ and sys.stderr.isatty()
+ and (args.destination != '-'
+ or not sys.stdout.isatty())):
+ args.progress = True
+ return args
+
+def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
+ global api_client
+
+ logger = logging.getLogger('arvados.arv-get')
+ args = parse_arguments(arguments, logger)
+ if api_client is None:
+ api_client = arvados.api('v1')
+
+ r = re.search(r'^(.*?)(/.*)?$', args.locator)
+ collection = r.group(1)
+ get_prefix = r.group(2)
+ if args.r and not get_prefix:
+ get_prefix = os.sep
+ reader = arvados.CollectionReader(collection, num_retries=args.retries)
+
+ if not get_prefix:
+ if not args.n:
+ open_flags = os.O_CREAT | os.O_WRONLY
+ if not args.f:
+ open_flags |= os.O_EXCL
+ try:
+ if args.destination == "-":
+ sys.stdout.write(reader.manifest_text())
+ else:
+ out_fd = os.open(args.destination, open_flags)
+ with os.fdopen(out_fd, 'wb') as out_file:
+ out_file.write(reader.manifest_text())
+ except (IOError, OSError) as error:
+ abort("can't write to '{}': {}".format(args.destination, error))
+ except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
+ abort("failed to download '{}': {}".format(collection, error))
+ sys.exit(0)
+
+ reader.normalize()
+
+ # Scan the collection. Make an array of (stream, file, local
+ # destination filename) tuples, and add up total size to extract.
+ todo = []
+ todo_bytes = 0
+ try:
+ for s in reader.all_streams():
+ for f in s.all_files():
+ if get_prefix and get_prefix[-1] == os.sep:
+ if 0 != string.find(os.path.join(s.name(), f.name()),
+ '.' + get_prefix):
+ continue
+ if args.destination == "-":
+ dest_path = "-"
+ else:
+ dest_path = os.path.join(
+ args.destination,
+ os.path.join(s.name(), f.name())[len(get_prefix)+1:])
+ if (not (args.n or args.f or args.skip_existing) and
+ os.path.exists(dest_path)):
+ abort('Local file %s already exists.' % (dest_path,))
+ else:
+ if os.path.join(s.name(), f.name()) != '.' + get_prefix:
+ continue
+ dest_path = args.destination
+ todo += [(s, f, dest_path)]
+ todo_bytes += f.size()
+ except arvados.errors.NotFoundError as e:
+ abort(e)
+
+ # Read data, and (if not -n) write to local file(s) or pipe.
+
+ out_bytes = 0
+ for s,f,outfilename in todo:
+ outfile = None
+ digestor = None
+ if not args.n:
+ if outfilename == "-":
+ outfile = sys.stdout
+ else:
+ if args.skip_existing and os.path.exists(outfilename):
+ logger.debug('Local file %s exists. Skipping.', outfilename)
+ continue
+ elif not args.f and (os.path.isfile(outfilename) or
+ os.path.isdir(outfilename)):
+ # Good thing we looked again: apparently this file wasn't
+ # here yet when we checked earlier.
+ abort('Local file %s already exists.' % (outfilename,))
+ if args.r:
+ arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
+ try:
+ outfile = open(outfilename, 'wb')
+ except Exception as error:
+ abort('Open(%s) failed: %s' % (outfilename, error))
+ if args.hash:
+ digestor = hashlib.new(args.hash)
+ try:
+ for data in f.readall():
+ if outfile:
+ outfile.write(data)
+ if digestor:
+ digestor.update(data)
+ out_bytes += len(data)
+ if args.progress:
+ sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
+ (out_bytes >> 20,
+ todo_bytes >> 20,
+ (100
+ if todo_bytes==0
+ else 100.0*out_bytes/todo_bytes)))
+ elif args.batch_progress:
+ sys.stderr.write('%s %d read %d total\n' %
+ (sys.argv[0], os.getpid(),
+ out_bytes, todo_bytes))
+ if digestor:
+ sys.stderr.write("%s %s/%s\n"
+ % (digestor.hexdigest(), s.name(), f.name()))
+ except KeyboardInterrupt:
+ if outfile and (outfile.fileno() > 2) and not outfile.closed:
+ os.unlink(outfile.name)
+ break
+
+ if args.progress:
+ sys.stderr.write('\n')
diff --git a/sdk/python/bin/arv-get b/sdk/python/bin/arv-get
index f91b397..1c2e552 100755
--- a/sdk/python/bin/arv-get
+++ b/sdk/python/bin/arv-get
@@ -1,238 +1,7 @@
#!/usr/bin/env python
-import argparse
-import hashlib
-import os
-import re
-import string
import sys
-import logging
-import arvados
-import arvados.commands._util as arv_cmd
+from arvados.commands.get import main
-from arvados._version import __version__
-
-logger = logging.getLogger('arvados.arv-get')
-
-def abort(msg, code=1):
- print >>sys.stderr, "arv-get:", msg
- exit(code)
-
-parser = argparse.ArgumentParser(
- description='Copy data from Keep to a local file or pipe.',
- parents=[arv_cmd.retry_opt])
-parser.add_argument('--version', action='version',
- version="%s %s" % (sys.argv[0], __version__),
- help='Print version and exit.')
-parser.add_argument('locator', type=str,
- help="""
-Collection locator, optionally with a file path or prefix.
-""")
-parser.add_argument('destination', type=str, nargs='?', default='-',
- help="""
-Local file or directory where the data is to be written. Default: stdout.
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('--progress', action='store_true',
- help="""
-Display human-readable progress on stderr (bytes and, if possible,
-percentage of total data size). This is the default behavior when it
-is not expected to interfere with the output: specifically, stderr is
-a tty _and_ either stdout is not a tty, or output is being written to
-named files rather than stdout.
-""")
-group.add_argument('--no-progress', action='store_true',
- help="""
-Do not display human-readable progress on stderr.
-""")
-group.add_argument('--batch-progress', action='store_true',
- help="""
-Display machine-readable progress on stderr (bytes and, if known,
-total data size).
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('--hash',
- help="""
-Display the hash of each file as it is read from Keep, using the given
-hash algorithm. Supported algorithms include md5, sha1, sha224,
-sha256, sha384, and sha512.
-""")
-group.add_argument('--md5sum', action='store_const',
- dest='hash', const='md5',
- help="""
-Display the MD5 hash of each file as it is read from Keep.
-""")
-parser.add_argument('-n', action='store_true',
- help="""
-Do not write any data -- just read from Keep, and report md5sums if
-requested.
-""")
-parser.add_argument('-r', action='store_true',
- help="""
-Retrieve all files in the specified collection/prefix. This is the
-default behavior if the "locator" argument ends with a forward slash.
-""")
-group = parser.add_mutually_exclusive_group()
-group.add_argument('-f', action='store_true',
- help="""
-Overwrite existing files while writing. The default behavior is to
-refuse to write *anything* if any of the output files already
-exist. As a special case, -f is not needed to write to stdout.
-""")
-group.add_argument('--skip-existing', action='store_true',
- help="""
-Skip files that already exist. The default behavior is to refuse to
-write *anything* if any files exist that would have to be
-overwritten. This option causes even devices, sockets, and fifos to be
-skipped.
-""")
-
-args = parser.parse_args()
-
-if args.locator[-1] == os.sep:
- args.r = True
-if (args.r and
- not args.n and
- not (args.destination and
- os.path.isdir(args.destination))):
- parser.error('Destination is not a directory.')
-if not args.r and (os.path.isdir(args.destination) or
- args.destination[-1] == os.path.sep):
- args.destination = os.path.join(args.destination,
- os.path.basename(args.locator))
- logger.debug("Appended source file name to destination directory: %s",
- args.destination)
-
-if args.destination == '/dev/stdout':
- args.destination = "-"
-
-if args.destination == '-':
- # Normally you have to use -f to write to a file (or device) that
- # already exists, but "-" and "/dev/stdout" are common enough to
- # merit a special exception.
- args.f = True
-else:
- args.destination = args.destination.rstrip(os.sep)
-
-# Turn on --progress by default if stderr is a tty and output is
-# either going to a named file, or going (via stdout) to something
-# that isn't a tty.
-if (not (args.batch_progress or args.no_progress)
- and sys.stderr.isatty()
- and (args.destination != '-'
- or not sys.stdout.isatty())):
- args.progress = True
-
-
-r = re.search(r'^(.*?)(/.*)?$', args.locator)
-collection = r.group(1)
-get_prefix = r.group(2)
-if args.r and not get_prefix:
- get_prefix = os.sep
-api_client = arvados.api('v1')
-reader = arvados.CollectionReader(collection, num_retries=args.retries)
-
-if not get_prefix:
- if not args.n:
- open_flags = os.O_CREAT | os.O_WRONLY
- if not args.f:
- open_flags |= os.O_EXCL
- try:
- if args.destination == "-":
- sys.stdout.write(reader.manifest_text())
- else:
- out_fd = os.open(args.destination, open_flags)
- with os.fdopen(out_fd, 'wb') as out_file:
- out_file.write(reader.manifest_text())
- except (IOError, OSError) as error:
- abort("can't write to '{}': {}".format(args.destination, error))
- except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
- abort("failed to download '{}': {}".format(collection, error))
- sys.exit(0)
-
-reader.normalize()
-
-# Scan the collection. Make an array of (stream, file, local
-# destination filename) tuples, and add up total size to extract.
-todo = []
-todo_bytes = 0
-try:
- for s in reader.all_streams():
- for f in s.all_files():
- if get_prefix and get_prefix[-1] == os.sep:
- if 0 != string.find(os.path.join(s.name(), f.name()),
- '.' + get_prefix):
- continue
- if args.destination == "-":
- dest_path = "-"
- else:
- dest_path = os.path.join(
- args.destination,
- os.path.join(s.name(), f.name())[len(get_prefix)+1:])
- if (not (args.n or args.f or args.skip_existing) and
- os.path.exists(dest_path)):
- abort('Local file %s already exists.' % (dest_path,))
- else:
- if os.path.join(s.name(), f.name()) != '.' + get_prefix:
- continue
- dest_path = args.destination
- todo += [(s, f, dest_path)]
- todo_bytes += f.size()
-except arvados.errors.NotFoundError as e:
- abort(e)
-
-# Read data, and (if not -n) write to local file(s) or pipe.
-
-out_bytes = 0
-for s,f,outfilename in todo:
- outfile = None
- digestor = None
- if not args.n:
- if outfilename == "-":
- outfile = sys.stdout
- else:
- if args.skip_existing and os.path.exists(outfilename):
- logger.debug('Local file %s exists. Skipping.', outfilename)
- continue
- elif not args.f and (os.path.isfile(outfilename) or
- os.path.isdir(outfilename)):
- # Good thing we looked again: apparently this file wasn't
- # here yet when we checked earlier.
- abort('Local file %s already exists.' % (outfilename,))
- if args.r:
- arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
- try:
- outfile = open(outfilename, 'wb')
- except Exception as error:
- abort('Open(%s) failed: %s' % (outfilename, error))
- if args.hash:
- digestor = hashlib.new(args.hash)
- try:
- for data in f.readall():
- if outfile:
- outfile.write(data)
- if digestor:
- digestor.update(data)
- out_bytes += len(data)
- if args.progress:
- sys.stderr.write('\r%d MiB / %d MiB %.1f%%' %
- (out_bytes >> 20,
- todo_bytes >> 20,
- (100
- if todo_bytes==0
- else 100.0*out_bytes/todo_bytes)))
- elif args.batch_progress:
- sys.stderr.write('%s %d read %d total\n' %
- (sys.argv[0], os.getpid(),
- out_bytes, todo_bytes))
- if digestor:
- sys.stderr.write("%s %s/%s\n"
- % (digestor.hexdigest(), s.name(), f.name()))
- except KeyboardInterrupt:
- if outfile and (outfile.fileno() > 2) and not outfile.closed:
- os.unlink(outfile.name)
- break
-
-if args.progress:
- sys.stderr.write('\n')
+sys.exit(main(sys.argv[1:], sys.stdout, sys.stderr))
commit 154ae0d4b13329bae1033ab99095c00d0b0f66e2
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Thu Mar 30 15:15:20 2017 -0300
7824: Further ordering fixes. Now all arv-ls tests passes.
diff --git a/sdk/python/arvados/commands/ls.py b/sdk/python/arvados/commands/ls.py
index c639585..cff7b55 100755
--- a/sdk/python/arvados/commands/ls.py
+++ b/sdk/python/arvados/commands/ls.py
@@ -60,7 +60,11 @@ def main(args, stdout, stderr, api_client=None):
return 0
def files_in_collection(c, stream_name='.'):
- for i in sorted(c.keys(), key=lambda k: k.upper()):
+ # Sort first by file type, then alphabetically by file path.
+ for i in sorted(c.keys(),
+ key=lambda k: (
+ isinstance(c[k], arvados.collection.Subcollection),
+ k.upper())):
if isinstance(c[i], arvados.arvfile.ArvadosFile):
yield FileInfo(stream_name=stream_name,
name=i,
commit ae7f5a9c869927336dc81cf0552b955457a09647
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Thu Mar 30 12:06:46 2017 -0300
7824: Fix upper/lower case ordering on arv-ls.
diff --git a/sdk/python/arvados/commands/ls.py b/sdk/python/arvados/commands/ls.py
index eded11a..c639585 100755
--- a/sdk/python/arvados/commands/ls.py
+++ b/sdk/python/arvados/commands/ls.py
@@ -60,7 +60,7 @@ def main(args, stdout, stderr, api_client=None):
return 0
def files_in_collection(c, stream_name='.'):
- for i in sorted(c.keys()):
+ for i in sorted(c.keys(), key=lambda k: k.upper()):
if isinstance(c[i], arvados.arvfile.ArvadosFile):
yield FileInfo(stream_name=stream_name,
name=i,
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list