[ARVADOS] updated: c1e7255a85dfc2807ba78e1cf9d109d896c80b42

git at public.curoverse.com git at public.curoverse.com
Fri May 30 15:23:03 EDT 2014


Summary of changes:
 .../app/assets/stylesheets/collections.css.scss    |   8 +-
 .../app/controllers/collections_controller.rb      |  42 +++
 apps/workbench/app/models/arvados_api_client.rb    |   8 +-
 .../app/views/collections/_sharing_button.html.erb |  21 ++
 .../app/views/collections/_show_files.html.erb     |   7 +-
 .../app/views/collections/sharing_popup.js.erb     |   1 +
 apps/workbench/app/views/collections/show.html.erb |   8 +-
 .../pipeline_templates/_show_components.html.erb   |   2 +-
 apps/workbench/config/routes.rb                    |   3 +
 .../src/arvados.org/keepclient/keepclient_test.go  |   2 +-
 sdk/go/src/arvados.org/keepclient/support.go       |   7 +
 sdk/python/arvados/collection.py                   | 216 +++++++++--
 .../python/arvados/commands/__init__.py            |   0
 sdk/python/arvados/commands/put.py                 | 399 +++++++++++++++++++++
 sdk/python/arvados/errors.py                       |   2 +
 sdk/python/arvados/keep.py                         |  84 +++++
 sdk/python/bin/arv-put                             | 215 +----------
 sdk/python/tests/arvados_testutil.py               |  61 ++++
 .../{testdata => tests/data}/1000G_ref_manifest    |   0
 sdk/python/{testdata => tests/data}/jlake_manifest |   0
 sdk/python/{ => tests}/run_test_server.py          |  42 ++-
 sdk/python/tests/test_arv-put.py                   | 352 ++++++++++++++++++
 sdk/python/{ => tests}/test_collections.py         | 309 +++++++++++-----
 sdk/python/{ => tests}/test_keep_client.py         |   0
 sdk/python/tests/test_keep_locator.py              |  67 ++++
 sdk/python/{ => tests}/test_pipeline_template.py   |   0
 sdk/python/{ => tests}/test_util.py                |   0
 sdk/python/{ => tests}/test_websockets.py          |   0
 .../v1/api_client_authorizations_controller.rb     |   2 +
 services/api/app/models/arvados_model.rb           |   6 +-
 services/api/lib/has_uuid.rb                       |   6 +-
 services/api/test/fixtures/links.yml               |  14 +
 .../api/test/unit/api_client_authorization_test.rb |  16 +-
 services/api/test/unit/permission_test.rb          |  14 +
 services/fuse/run_test_server.py                   |   1 -
 services/fuse/tests/run_test_server.py             |   1 +
 services/fuse/{ => tests}/test_mount.py            |   0
 .../keep/src/arvados.org/keepproxy/keepproxy.go    |   1 +
 .../src/arvados.org/keepproxy/keepproxy_test.go    |   2 +-
 39 files changed, 1551 insertions(+), 368 deletions(-)
 create mode 100644 apps/workbench/app/views/collections/_sharing_button.html.erb
 create mode 100644 apps/workbench/app/views/collections/sharing_popup.js.erb
 copy apps/workbench/app/mailers/.gitkeep => sdk/python/arvados/commands/__init__.py (100%)
 create mode 100644 sdk/python/arvados/commands/put.py
 create mode 100644 sdk/python/tests/arvados_testutil.py
 rename sdk/python/{testdata => tests/data}/1000G_ref_manifest (100%)
 rename sdk/python/{testdata => tests/data}/jlake_manifest (100%)
 rename sdk/python/{ => tests}/run_test_server.py (91%)
 create mode 100644 sdk/python/tests/test_arv-put.py
 rename sdk/python/{ => tests}/test_collections.py (68%)
 rename sdk/python/{ => tests}/test_keep_client.py (100%)
 create mode 100644 sdk/python/tests/test_keep_locator.py
 rename sdk/python/{ => tests}/test_pipeline_template.py (100%)
 rename sdk/python/{ => tests}/test_util.py (100%)
 rename sdk/python/{ => tests}/test_websockets.py (100%)
 delete mode 120000 services/fuse/run_test_server.py
 create mode 120000 services/fuse/tests/run_test_server.py
 rename services/fuse/{ => tests}/test_mount.py (100%)

       via  c1e7255a85dfc2807ba78e1cf9d109d896c80b42 (commit)
       via  c7ee5e02cae78d3edff6ed393d776c4995441896 (commit)
       via  ffe3cdbc8c37e2b4a4e3ea4f67c1c9ca5d81e2ed (commit)
       via  c5f14dadb8306f55035dc0c54ec2bdd0acbcc705 (commit)
       via  3c68bb92a1f59bd762ac02b72f2e11f3296b40c9 (commit)
       via  0a4f2421e30dc0ae43908310a3328cc8553d7a76 (commit)
       via  8f65fcff76bdc20e5a15368a30b9ce27c262d57f (commit)
       via  83b6a3fdd9a0fe432758dd44acf252bbd129a930 (commit)
       via  06cf8e35c69540ae44890f8e96961e31cb7fbe66 (commit)
       via  347fcb3c09ba4cbcd0385c21b0dd409eebe3b93c (commit)
       via  6d23a7362308b808a10b698c84a022287d1668a6 (commit)
       via  8b7ea9235e5761beab0dbc92d9c400574f672a15 (commit)
       via  8f5755f5ec65da08ffe9dfc6856848ec747a31a8 (commit)
       via  aa384c3f2d8b7d1782ea059e1eb56c15f542a40c (commit)
       via  aa3d77be5c99f5f8724fb010336b3379f97985c3 (commit)
       via  f9eeccd5633231fdd3e4c7b2fb5c694dff137de9 (commit)
       via  475f5ad53cfe66375aa14fb28419c5b393cc946e (commit)
       via  bd7ce9eaf1081f760fe3b13a9b5f3c74dcd44855 (commit)
       via  0a5265697a17a6703f89df12f665068817b90fcf (commit)
       via  609b941f5f1787ea8380518551b63e259d36c657 (commit)
       via  d603426ef3b87a1a84e71122cbf8330074d68fef (commit)
       via  c63dcd2c097233f322e6980815347b0c47f9e5bf (commit)
       via  516d43a18765674c2a5d0f0bc2a4a4a789d4c61c (commit)
       via  0888e3a31a1af2041e316a2e7b3db74af1cea373 (commit)
       via  fe85ec515483d95ac36e00e5a411da9c1f76f2de (commit)
       via  58a78243026ac9a4569d6cc05bf77045bff7ab20 (commit)
       via  283154a1d4ebc745f03abeef96c0571d284d4a70 (commit)
       via  91e7c9058bf1f38ad50008a6fd2397c1e15d33eb (commit)
       via  5d3b5a301e1087531b087e06e9d8d4607ad66917 (commit)
       via  5c6e82a9eec78dea7c3a991aa735cdc5e87c5120 (commit)
       via  6c3086d313afba5e37e6949ecb454519b8d042e4 (commit)
       via  66db17f904a1591c135a1aa196865d8f53aa7632 (commit)
       via  bd4eb5f518a3253f7858128419f51e2a1b25f951 (commit)
       via  73dd64f9c81989aad456f1aee8f0ad909703b859 (commit)
       via  4406d50cdf2ae7b2c3e9846ddcf629ad535cf7fa (commit)
       via  601d191c37c7edcaca8a6176eae736982f750a89 (commit)
       via  ae4db6299b2d255bbc08a4c6fd3e77abcf030fb8 (commit)
       via  fc2b0d0b96456bd260f2f508c2da10d74aba22f2 (commit)
       via  3507e379566beba249c137c5decb062decc24cf2 (commit)
       via  29a07d251a7eecd0ae4965ac5113f9de7da4e6b7 (commit)
       via  878620a8b1827ed3f58e267a89f76c2dbeaa4b65 (commit)
       via  f87dd9682c5c1583e7d908cf8ed7fae6e4a53c7a (commit)
       via  1ee02554ce8b7ec41264f457897a9309ae3fe1a5 (commit)
       via  5cf3dd70bc2f2947ee42afadbced3f3cde81fa16 (commit)
       via  8dbe66a50a101509b921e9eba33f5f392ce6eed1 (commit)
       via  057f835f3c2f28fcaad3baebe9c279a9d6548731 (commit)
       via  3d869c8fd045305bd1933f03f72da153dfd985af (commit)
       via  e9c5c59ee3dc288146eed9ec1e2405e44c99dc8c (commit)
       via  ba1b5732843a1b78c11fc311a52128acc1fb9f6d (commit)
       via  e947468785cfcd8ebd1324ca1b477351a55b10fd (commit)
       via  317064a4ddead0d64d6e312a21d2bb34504aa104 (commit)
       via  82b46502f25b6992c93bfe7689acc095aa447e5b (commit)
       via  19f4e54627a88c115e299fa328acf22504d1ce66 (commit)
       via  ec07cd1ad893c15d94844b3cf2d8d95ae5cfd611 (commit)
       via  747aa7b4d080b4ea95bf7d6d8643c43e70966f33 (commit)
       via  651638a28db20a2016dff02e3baa106ab27ff945 (commit)
       via  1db007eb53d0401a7a0ba168add7c4a094790fa5 (commit)
       via  675794872a5d064cf0a8177d662555c04b0dae51 (commit)
       via  06a0c1d9f2e6cf1d5a9fd00b53071d857252f9fa (commit)
       via  8b030cb82d414bfa0559a205c150f4bfe792caba (commit)
       via  871a7250874ec52543bed51c8b6d14a3ab860eb8 (commit)
       via  c3fd48d0728c140fbe0ab038ad148cfae8104c97 (commit)
       via  35bc4e20adcc706ffdda3b1c9aeed1b34a20c51b (commit)
      from  39b2ed3024860262747fd6622433e0bf7ba39530 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit c1e7255a85dfc2807ba78e1cf9d109d896c80b42
Merge: 39b2ed3 c7ee5e0
Author: Tim Pierce <twp at curoverse.com>
Date:   Fri May 30 15:22:32 2014 -0400

    Merge branch 'master' into 2755-python-sdk-permissions
    
    Conflicts:
    	sdk/python/bin/arv-put

diff --cc sdk/python/arvados/commands/put.py
index 0000000,7f4d430..255021e
mode 000000,100644..100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@@ -1,0 -1,399 +1,399 @@@
+ #!/usr/bin/env python
+ 
+ # TODO:
+ # --md5sum - display md5 of each file as read from disk
+ 
+ import argparse
+ import arvados
+ import base64
+ import errno
+ import fcntl
+ import hashlib
+ import json
+ import os
+ import signal
+ import sys
+ import tempfile
+ 
+ CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
+ 
+ def parse_arguments(arguments):
+     parser = argparse.ArgumentParser(
+         description='Copy data from the local filesystem to Keep.')
+ 
+     parser.add_argument('paths', metavar='path', type=str, nargs='*',
+                         help="""
+     Local file or directory. Default: read from standard input.
+     """)
+ 
+     parser.add_argument('--max-manifest-depth', type=int, metavar='N',
+                         default=-1, help="""
+     Maximum depth of directory tree to represent in the manifest
+     structure. A directory structure deeper than this will be represented
+     as a single stream in the manifest. If N=0, the manifest will contain
+     a single stream. Default: -1 (unlimited), i.e., exactly one manifest
+     stream per filesystem directory that contains files.
+     """)
+ 
+     group = parser.add_mutually_exclusive_group()
+ 
+     group.add_argument('--as-stream', action='store_true', dest='stream',
+                        help="""
+     Synonym for --stream.
+     """)
+ 
+     group.add_argument('--stream', action='store_true',
+                        help="""
+     Store the file content and display the resulting manifest on
+     stdout. Do not write the manifest to Keep or save a Collection object
+     in Arvados.
+     """)
+ 
+     group.add_argument('--as-manifest', action='store_true', dest='manifest',
+                        help="""
+     Synonym for --manifest.
+     """)
+ 
+     group.add_argument('--in-manifest', action='store_true', dest='manifest',
+                        help="""
+     Synonym for --manifest.
+     """)
+ 
+     group.add_argument('--manifest', action='store_true',
+                        help="""
+     Store the file data and resulting manifest in Keep, save a Collection
+     object in Arvados, and display the manifest locator (Collection uuid)
+     on stdout. This is the default behavior.
+     """)
+ 
+     group.add_argument('--as-raw', action='store_true', dest='raw',
+                        help="""
+     Synonym for --raw.
+     """)
+ 
+     group.add_argument('--raw', action='store_true',
+                        help="""
+     Store the file content and display the data block locators on stdout,
+     separated by commas, with a trailing newline. Do not store a
+     manifest.
+     """)
+ 
+     parser.add_argument('--use-filename', type=str, default=None,
+                         dest='filename', help="""
+     Synonym for --filename.
+     """)
+ 
+     parser.add_argument('--filename', type=str, default=None,
+                         help="""
+     Use the given filename in the manifest, instead of the name of the
+     local file. This is useful when "-" or "/dev/stdin" is given as an
+     input file. It can be used only if there is exactly one path given and
+     it is not a directory. Implies --manifest.
+     """)
+ 
+     group = parser.add_mutually_exclusive_group()
+     group.add_argument('--progress', action='store_true',
+                        help="""
+     Display human-readable progress on stderr (bytes and, if possible,
+     percentage of total data size). This is the default behavior when
+     stderr is a tty.
+     """)
+ 
+     group.add_argument('--no-progress', action='store_true',
+                        help="""
+     Do not display human-readable progress on stderr, even if stderr is a
+     tty.
+     """)
+ 
+     group.add_argument('--batch-progress', action='store_true',
+                        help="""
+     Display machine-readable progress on stderr (bytes and, if known,
+     total data size).
+     """)
+ 
+     group = parser.add_mutually_exclusive_group()
+     group.add_argument('--resume', action='store_true', default=True,
+                        help="""
+     Continue interrupted uploads from cached state (default).
+     """)
+     group.add_argument('--no-resume', action='store_false', dest='resume',
+                        help="""
+     Do not continue interrupted uploads from cached state.
+     """)
+ 
+     args = parser.parse_args(arguments)
+ 
+     if len(args.paths) == 0:
+         args.paths += ['/dev/stdin']
+ 
+     if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
+         if args.filename:
+             parser.error("""
+     --filename argument cannot be used when storing a directory or
+     multiple files.
+     """)
+ 
+     # Turn on --progress by default if stderr is a tty.
+     if (not (args.batch_progress or args.no_progress)
+         and os.isatty(sys.stderr.fileno())):
+         args.progress = True
+ 
+     if args.paths == ['-']:
+         args.paths = ['/dev/stdin']
+         if not args.filename:
+             args.filename = '-'
+ 
+     return args
+ 
+ class ResumeCacheConflict(Exception):
+     pass
+ 
+ 
+ class ResumeCache(object):
+     CACHE_DIR = os.path.expanduser('~/.cache/arvados/arv-put')
+ 
+     @classmethod
+     def setup_user_cache(cls):
+         try:
+             os.makedirs(cls.CACHE_DIR)
+         except OSError as error:
+             if error.errno != errno.EEXIST:
+                 raise
+         else:
+             os.chmod(cls.CACHE_DIR, 0o700)
+ 
+     def __init__(self, file_spec):
+         self.cache_file = open(file_spec, 'a+')
+         self._lock_file(self.cache_file)
+         self.filename = self.cache_file.name
+ 
+     @classmethod
+     def make_path(cls, args):
+         md5 = hashlib.md5()
+         md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost'))
+         realpaths = sorted(os.path.realpath(path) for path in args.paths)
+         md5.update('\0'.join(realpaths))
+         if any(os.path.isdir(path) for path in realpaths):
+             md5.update(str(max(args.max_manifest_depth, -1)))
+         elif args.filename:
+             md5.update(args.filename)
+         return os.path.join(cls.CACHE_DIR, md5.hexdigest())
+ 
+     def _lock_file(self, fileobj):
+         try:
+             fcntl.flock(fileobj, fcntl.LOCK_EX | fcntl.LOCK_NB)
+         except IOError:
+             raise ResumeCacheConflict("{} locked".format(fileobj.name))
+ 
+     def load(self):
+         self.cache_file.seek(0)
+         return json.load(self.cache_file)
+ 
+     def save(self, data):
+         try:
+             new_cache_fd, new_cache_name = tempfile.mkstemp(
+                 dir=os.path.dirname(self.filename))
+             self._lock_file(new_cache_fd)
+             new_cache = os.fdopen(new_cache_fd, 'r+')
+             json.dump(data, new_cache)
+             os.rename(new_cache_name, self.filename)
+         except (IOError, OSError, ResumeCacheConflict) as error:
+             try:
+                 os.unlink(new_cache_name)
+             except NameError:  # mkstemp failed.
+                 pass
+         else:
+             self.cache_file.close()
+             self.cache_file = new_cache
+ 
+     def close(self):
+         self.cache_file.close()
+ 
+     def destroy(self):
+         try:
+             os.unlink(self.filename)
+         except OSError as error:
+             if error.errno != errno.ENOENT:  # That's what we wanted anyway.
+                 raise
+         self.close()
+ 
+     def restart(self):
+         self.destroy()
+         self.__init__(self.filename)
+ 
+ 
+ class ArvPutCollectionWriter(arvados.ResumableCollectionWriter):
+     STATE_PROPS = (arvados.ResumableCollectionWriter.STATE_PROPS +
+                    ['bytes_written', '_seen_inputs'])
+ 
+     def __init__(self, cache=None, reporter=None, bytes_expected=None):
+         self.bytes_written = 0
+         self._seen_inputs = []
+         self.cache = cache
+         self.reporter = reporter
+         self.bytes_expected = bytes_expected
+         super(ArvPutCollectionWriter, self).__init__()
+ 
+     @classmethod
+     def from_cache(cls, cache, reporter=None, bytes_expected=None):
+         try:
+             state = cache.load()
+             state['_data_buffer'] = [base64.decodestring(state['_data_buffer'])]
+             writer = cls.from_state(state, cache, reporter, bytes_expected)
+         except (TypeError, ValueError,
+                 arvados.errors.StaleWriterStateError) as error:
+             return cls(cache, reporter, bytes_expected)
+         else:
+             return writer
+ 
+     def cache_state(self):
+         if self.cache is None:
+             return
+         state = self.dump_state()
+         # Transform attributes for serialization.
+         for attr, value in state.items():
+             if attr == '_data_buffer':
+                 state[attr] = base64.encodestring(''.join(value))
+             elif hasattr(value, 'popleft'):
+                 state[attr] = list(value)
+         self.cache.save(state)
+ 
+     def report_progress(self):
+         if self.reporter is not None:
+             self.reporter(self.bytes_written, self.bytes_expected)
+ 
+     def flush_data(self):
+         start_buffer_len = self._data_buffer_len
+         start_block_count = self.bytes_written / self.KEEP_BLOCK_SIZE
+         super(ArvPutCollectionWriter, self).flush_data()
+         if self._data_buffer_len < start_buffer_len:  # We actually PUT data.
+             self.bytes_written += (start_buffer_len - self._data_buffer_len)
+             self.report_progress()
+             if (self.bytes_written / self.KEEP_BLOCK_SIZE) > start_block_count:
+                 self.cache_state()
+ 
+     def _record_new_input(self, input_type, source_name, dest_name):
+         # The key needs to be a list because that's what we'll get back
+         # from JSON deserialization.
+         key = [input_type, source_name, dest_name]
+         if key in self._seen_inputs:
+             return False
+         self._seen_inputs.append(key)
+         return True
+ 
+     def write_file(self, source, filename=None):
+         if self._record_new_input('file', source, filename):
+             super(ArvPutCollectionWriter, self).write_file(source, filename)
+ 
+     def write_directory_tree(self,
+                              path, stream_name='.', max_manifest_depth=-1):
+         if self._record_new_input('directory', path, stream_name):
+             super(ArvPutCollectionWriter, self).write_directory_tree(
+                 path, stream_name, max_manifest_depth)
+ 
+ 
+ def expected_bytes_for(pathlist):
+     # Walk the given directory trees and stat files, adding up file sizes,
+     # so we can display progress as percent
+     bytesum = 0
+     for path in pathlist:
+         if os.path.isdir(path):
+             for filename in arvados.util.listdir_recursive(path):
+                 bytesum += os.path.getsize(os.path.join(path, filename))
+         elif not os.path.isfile(path):
+             return None
+         else:
+             bytesum += os.path.getsize(path)
+     return bytesum
+ 
+ _machine_format = "{} {}: {{}} written {{}} total\n".format(sys.argv[0],
+                                                             os.getpid())
+ def machine_progress(bytes_written, bytes_expected):
+     return _machine_format.format(
+         bytes_written, -1 if (bytes_expected is None) else bytes_expected)
+ 
+ def human_progress(bytes_written, bytes_expected):
+     if bytes_expected:
+         return "\r{}M / {}M {:.1%} ".format(
+             bytes_written >> 20, bytes_expected >> 20,
+             float(bytes_written) / bytes_expected)
+     else:
+         return "\r{} ".format(bytes_written)
+ 
+ def progress_writer(progress_func, outfile=sys.stderr):
+     def write_progress(bytes_written, bytes_expected):
+         outfile.write(progress_func(bytes_written, bytes_expected))
+     return write_progress
+ 
+ def exit_signal_handler(sigcode, frame):
+     sys.exit(-sigcode)
+ 
+ def main(arguments=None):
+     ResumeCache.setup_user_cache()
+     args = parse_arguments(arguments)
+ 
+     if args.progress:
+         reporter = progress_writer(human_progress)
+     elif args.batch_progress:
+         reporter = progress_writer(machine_progress)
+     else:
+         reporter = None
+ 
+     try:
+         resume_cache = ResumeCache(ResumeCache.make_path(args))
+         if not args.resume:
+             resume_cache.restart()
+     except ResumeCacheConflict:
+         print "arv-put: Another process is already uploading this data."
+         sys.exit(1)
+ 
+     writer = ArvPutCollectionWriter.from_cache(
+         resume_cache, reporter, expected_bytes_for(args.paths))
+ 
+     # Install our signal handler for each code in CAUGHT_SIGNALS, and save
+     # the originals.
+     orig_signal_handlers = {sigcode: signal.signal(sigcode, exit_signal_handler)
+                             for sigcode in CAUGHT_SIGNALS}
+ 
+     if writer.bytes_written > 0:  # We're resuming a previous upload.
+         print >>sys.stderr, "\n".join([
+                 "arv-put: Resuming previous upload from last checkpoint.",
+                 "         Use the --no-resume option to start over."])
+         writer.report_progress()
+ 
+     writer.do_queued_work()  # Do work resumed from cache.
+     for path in args.paths:  # Copy file data to Keep.
+         if os.path.isdir(path):
+             writer.write_directory_tree(
+                 path, max_manifest_depth=args.max_manifest_depth)
+         else:
+             writer.start_new_stream()
+             writer.write_file(path, args.filename or os.path.basename(path))
+     writer.finish_current_stream()
+ 
+     if args.progress:  # Print newline to split stderr from stdout for humans.
+         print >>sys.stderr
+ 
+     if args.stream:
+         print writer.manifest_text(),
+     elif args.raw:
+         print ','.join(writer.data_locators())
+     else:
+         # Register the resulting collection in Arvados.
 -        arvados.api().collections().create(
++        collection = arvados.api().collections().create(
+             body={
+                 'uuid': writer.finish(),
+                 'manifest_text': writer.manifest_text(),
+                 },
+             ).execute()
+ 
+         # Print the locator (uuid) of the new collection.
 -        print writer.finish()
++        print collection['uuid']
+ 
+     for sigcode, orig_handler in orig_signal_handlers.items():
+         signal.signal(sigcode, orig_handler)
+ 
+     resume_cache.destroy()
+ 
+ if __name__ == '__main__':
+     main()

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list