[ARVADOS] updated: 0c3581ea0f492024a44475aa76fe5c728cbcb38c

Git user git at public.curoverse.com
Sat Jun 24 09:10:43 EDT 2017


Summary of changes:
 .licenseignore                                     |  46 +++
 build/check-copyright-notices                      | 201 ++++++++++++++
 build/{libcloud-pin => libcloud-pin.sh}            |   0
 build/package-build-dockerfiles/Makefile           |   2 +-
 build/package-build-dockerfiles/centos7/Dockerfile |   2 +-
 build/package-build-dockerfiles/debian8/Dockerfile |   2 +-
 .../ubuntu1204/Dockerfile                          |   2 +-
 .../ubuntu1404/Dockerfile                          |   2 +-
 .../ubuntu1604/Dockerfile                          |   2 +-
 build/run-build-packages-python-and-ruby.sh        |   2 +-
 build/run-build-packages.sh                        |   2 +-
 build/run-tests.sh                                 |   6 +-
 doc/start/index.html.textile.liquid.bkup           |  50 ----
 sdk/cwl/arvados_cwl/__init__.py                    |   3 +-
 sdk/cwl/arvados_cwl/arvjob.py                      |   9 +
 sdk/cwl/arvados_cwl/done.py                        |  62 +++--
 sdk/cwl/test_with_arvbox.sh                        |  18 +-
 sdk/cwl/tests/test_job.py                          |  68 ++++-
 sdk/go/arvados/collection_fs.go                    | 235 ++++++++++++++++
 sdk/go/arvados/collection_fs_test.go               | 125 +++++++++
 sdk/go/arvadostest/fixtures.go                     |   3 +
 sdk/go/keepclient/collectionreader.go              |  21 +-
 sdk/python/arvados/commands/put.py                 | 270 +++++++++---------
 sdk/python/tests/test_arv_put.py                   |  79 ++++--
 .../arvados/v1/repositories_controller.rb          |  17 +-
 .../app/controllers/arvados/v1/users_controller.rb |  48 +---
 services/api/app/models/node.rb                    |  29 +-
 services/api/app/models/user.rb                    |  66 +++--
 services/api/lib/can_be_an_owner.rb                |  42 +--
 .../functional/arvados/v1/users_controller_test.rb |  38 ++-
 services/api/test/integration/users_test.rb        |   9 +-
 services/api/test/unit/node_test.rb                |  21 +-
 services/api/test/unit/user_test.rb                |  17 +-
 services/crunch-run/crunchrun.go                   |   4 +-
 services/crunch-run/crunchrun_test.go              |  13 +-
 services/keep-web/cache.go                         |  32 +--
 services/keep-web/cache_test.go                    |  12 +-
 services/keep-web/handler.go                       | 308 ++++++++++++++++-----
 services/keep-web/handler_test.go                  | 106 +++++++
 services/keep-web/server_test.go                   |   4 +-
 .../arvnodeman/computenode/driver/ec2.py           |  19 +-
 services/nodemanager/arvnodeman/daemon.py          |   2 +-
 .../nodemanager/arvnodeman/test/fake_driver.py     |  32 ++-
 services/nodemanager/setup.py                      |   6 +-
 .../tests/test_computenode_driver_ec2.py           |  50 ++++
 services/nodemanager/tests/testutil.py             |   3 +-
 tools/arvbox/bin/arvbox                            |   2 +-
 tools/arvbox/lib/arvbox/docker/Dockerfile.base     |   5 +-
 48 files changed, 1579 insertions(+), 518 deletions(-)
 create mode 100644 .licenseignore
 create mode 100755 build/check-copyright-notices
 rename build/{libcloud-pin => libcloud-pin.sh} (100%)
 delete mode 100644 doc/start/index.html.textile.liquid.bkup
 create mode 100644 sdk/go/arvados/collection_fs.go
 create mode 100644 sdk/go/arvados/collection_fs_test.go

       via  0c3581ea0f492024a44475aa76fe5c728cbcb38c (commit)
       via  abfa5d90a2c7ae6d92b58813afa2d0fb258ca320 (commit)
       via  c2ceb956b2c9a7d5d2e14925cd1b68e332b36ad6 (commit)
       via  91a7feea5074d303cb0eae3f5c0d53b61d37fc81 (commit)
       via  a9cdc576d334dc5cb1a722f2b1c6b2266af8f986 (commit)
       via  b1f94004653da79774a8c9afd641ab7b492398a6 (commit)
       via  f054bc3d7d3d26962e62c2ea7c27214b08e85bb6 (commit)
       via  9e575bce7c0757f270b584d434d2ada5bc98bc3d (commit)
       via  02c1d68ab5eeafffb09482d0432f8c4a6cb6dfca (commit)
       via  9c6aa66e38395f4ca658a258d27fee2c05c595e2 (commit)
       via  eca14e106a73ebe62f5aa0fc3060ade2d42f8e20 (commit)
       via  a249cd98f534ab0a1cba1345b33142f438b7d0f6 (commit)
       via  74b3ad1f061185ca695e8bbead723b5212bbb06a (commit)
       via  04efddf61ee4a0e5c65a72a538fe3f026ae94e8e (commit)
       via  e7dc406d4babf0fae50837c7f0040dc485242e36 (commit)
       via  5598bbc3c71da60b7b7a665b36495b957d6a3c52 (commit)
       via  e2cd53d9007d56e1de4816f6aeab4bd769271162 (commit)
       via  9ca7acc39ed2e1e2100869e17be3e5cff7c835b6 (commit)
       via  cf9874c59fa5a73d395743aaf71555b441161e3e (commit)
       via  62a1c9045b8313cc7e1b6be16ab922e3eff1bcfd (commit)
       via  4db4e97167a25b519581fb5f4ebef2169464333e (commit)
       via  04f9ccc7ca627d41175f44f515e4581b6937f43b (commit)
       via  110b7d2f628aa9b60fc2beecb66ead15cc60660f (commit)
       via  e2a4e065951ea459570ae75dbe2ed4fd4b6d4bd8 (commit)
       via  b89fda1904ec63d637c43831df66b334f6a78377 (commit)
       via  7409b2f682d562cd7ef7bcf558597ae2181ea7c1 (commit)
       via  f77d63e6cfaf7278c1cb0fb05e5a4e3f45320e3a (commit)
       via  a7240d9ad7e1bd676097665fe3fdc1727cbc2e1b (commit)
       via  312b1a86d1ec886f4baec15034ba1600a9cf1ec2 (commit)
       via  490895ac6ab70a5340b80e734b81a2250412635c (commit)
       via  c40b0299b306f2b508e178b2df210f8a3d9d10e7 (commit)
       via  b7f7d35140e9b412b835817d4f8a078271af4fc0 (commit)
       via  51920838d495265bddb71e61dbd8122b1fe0cb6e (commit)
       via  405d6345dcbe1494e2a9202bcb5cdf519e7d3e0a (commit)
       via  021d36f17fe0329e869324d3764eaaf15c3a0771 (commit)
       via  82de7379de9a488a1e5ace7bb854a4c271c466a8 (commit)
       via  1a86e126dcc5d60666d8233a1b458591a6632190 (commit)
       via  050be6a5be43ab503820955dbca2751ca368063c (commit)
       via  eb0012d203974e54023dfcac6e04fd4c2c40270f (commit)
       via  156f7186528045a58628c58f3b4b48fcb3825cd5 (commit)
       via  0bd8810e6ea29ab242472b9dae11c621cf50953c (commit)
       via  3c44c82acb3dbf50acd75f0c979d8a18ee11dbd0 (commit)
       via  510a92b885ff547dd7eecb34093f27a7245f021f (commit)
       via  2b62223c9ba420208b9f293825e7f6ae3f50f95b (commit)
       via  28cf4975bf3a72ff11ab4044a54b434857b1b95e (commit)
       via  803d4fd3eff1d5c0d4cb9793fb06e1117039beae (commit)
       via  2796844aee26155b5c78e7c69830652f51f7342e (commit)
       via  ec7510c680ee2065d6372fef6a340ef754dbe724 (commit)
       via  abf007273ba68c2eb541763e40b19d1703132685 (commit)
       via  9ae2b6b3427f131fc61f574a2061111c9626bf6c (commit)
       via  a4a1652b72a4fc0fb784f08152ef31f97534c76d (commit)
       via  08793025fb951153ce374f8eb4f984ee21f6a2bc (commit)
       via  cd1a869f36d2a04c59fd995b83d12d5a0b529e19 (commit)
       via  e3a2f5a80c1e4bd645e7e8bff27ef94f89ceae3e (commit)
       via  6ab526bb7fef3d7d42ff728fa30444e75de7be38 (commit)
       via  7cdfe579a8ad314cf303a280b0de68b026244748 (commit)
       via  f8f8b9030fd63fd6ccc10d02cd2e4cba6ea685b8 (commit)
       via  3120003dc579730ac67cac8a47f209b14ec748d3 (commit)
       via  d516e20a5cfce7f1f0c78587f3ce847125c98921 (commit)
       via  42677f11798ac78f2898fe5ce9913ff8e14578e8 (commit)
      from  6c1f8e1f0bb615236a999063a56cb26d209fe787 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 0c3581ea0f492024a44475aa76fe5c728cbcb38c
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Sat Jun 24 10:10:06 2017 -0300

    11789: Enhanced help message for --exclude
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 7e961b7..9fa68ec 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -159,8 +159,12 @@ Save the collection with the specified name.
 
 run_opts.add_argument('--exclude', metavar='PATTERN', default=[],
                       action='append', help="""
-Exclude files and directories whose names match the given pattern. You
-can specify multiple patterns by using this argument more than once.
+Exclude files and directories whose names match the given glob pattern. When
+using a path-like pattern like 'subdir/*.txt', all text files inside 'subdir'
+directory, relative to the provided input dirs will be excluded.
+When using a filename pattern like '*.txt', any text file will be excluded
+no matter where is placed.
+You can specify multiple patterns by using this argument more than once.
 """)
 
 _group = run_opts.add_mutually_exclusive_group()
@@ -467,26 +471,32 @@ class ArvPutUploadJob(object):
                 path = os.path.abspath(path)
                 if orig_path[-1:] == os.sep:
                     # When passing a directory reference with a trailing slash,
-                    # its contents should be uploaded directly to the collection's root.
+                    # its contents should be uploaded directly to the
+                    # collection's root.
                     prefixdir = path
                 else:
                     # When passing a directory reference with no trailing slash,
                     # upload the directory to the collection's root.
                     prefixdir = os.path.dirname(path)
                 prefixdir += os.sep
-                for root, dirs, files in os.walk(path, followlinks=self.follow_links):
+                for root, dirs, files in os.walk(path,
+                                                 followlinks=self.follow_links):
                     root_relpath = os.path.relpath(root, path)
+                    if root_relpath == '.':
+                        root_relpath = ''
                     # Exclude files/dirs by full path matching pattern
                     if self.exclude_paths:
                         dirs[:] = filter(
-                            lambda d: not any([pathname_match(os.path.join(root_relpath, d),
-                                                              pat)
-                                               for pat in self.exclude_paths]),
+                            lambda d: not any(
+                                [pathname_match(os.path.join(root_relpath, d),
+                                                pat)
+                                 for pat in self.exclude_paths]),
                             dirs)
                         files = filter(
-                            lambda f: not any([pathname_match(os.path.join(root_relpath, f),
-                                                              pat)
-                                               for pat in self.exclude_paths]),
+                            lambda f: not any(
+                                [pathname_match(os.path.join(root_relpath, f),
+                                                pat)
+                                 for pat in self.exclude_paths]),
                             files)
                     # Exclude files/dirs by name matching pattern
                     if self.exclude_names is not None:
@@ -1012,7 +1022,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
                 p_parts = p.split(os.sep)
                 if '.' in p_parts or '..' in p_parts:
                     logger.error(
-                        "Cannot use path patterns that include '.' or '..")
+                        "Cannot use path patterns that include '.' or '..'")
                     sys.exit(1)
                 # Path search pattern
                 exclude_paths.append(p)

commit abfa5d90a2c7ae6d92b58813afa2d0fb258ca320
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Fri Jun 23 17:59:06 2017 -0300

    11789: Path exclude patterns validation and fixes.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 048e412..7e961b7 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -907,7 +907,8 @@ _machine_format = "{} {}: {{}} written {{}} total\n".format(sys.argv[0],
 # so instead we're using it on every path component.
 def pathname_match(pathname, pattern):
     name = pathname.split(os.sep)
-    pat = pattern.split(os.sep)
+    # Fix patterns like 'some/subdir/' or 'some//subdir'
+    pat = [x for x in pattern.split(os.sep) if x != '']
     if len(name) != len(pat):
         return False
     for i in range(len(name)):
@@ -996,15 +997,23 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     exclude_names = None
     if len(args.exclude) > 0:
         # We're supporting 2 kinds of exclusion patterns:
-        # 1) --exclude '*.jpg'      (file/dir name patterns, will only match the name)
-        # 2) --exclude 'foo/bar'    (file/dir path patterns, will match the entire path,
-        #                            and should be relative to any input dir argument)
+        # 1) --exclude '*.jpg'      (file/dir name patterns, will only match
+        #                            the name)
+        # 2) --exclude 'foo/bar'    (file/dir path patterns, will match the
+        #                            entire path, and should be relative to
+        #                            any input dir argument)
         for p in args.exclude:
             # Only relative paths patterns allowed
             if p.startswith(os.sep):
                 logger.error("Cannot use absolute paths with --exclude")
                 sys.exit(1)
             if os.path.dirname(p):
+                # We don't support of path patterns with '.' or '..'
+                p_parts = p.split(os.sep)
+                if '.' in p_parts or '..' in p_parts:
+                    logger.error(
+                        "Cannot use path patterns that include '.' or '..")
+                    sys.exit(1)
                 # Path search pattern
                 exclude_paths.append(p)
             else:

commit c2ceb956b2c9a7d5d2e14925cd1b68e332b36ad6
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Wed Jun 21 12:12:18 2017 -0300

    11789: Unified the exclude logic by removing expected_bytes_for() and moving
    the upload list code to a new method.
    Updated tests to be in sync with this refactoring.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index ba7ff2b..048e412 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -383,7 +383,7 @@ class ArvPutUploadJob(object):
     }
 
     def __init__(self, paths, resume=True, use_cache=True, reporter=None,
-                 bytes_expected=None, name=None, owner_uuid=None,
+                 name=None, owner_uuid=None,
                  ensure_unique_name=False, num_retries=None,
                  put_threads=None, replication_desired=None,
                  filename=None, update_time=60.0, update_collection=None,
@@ -394,7 +394,9 @@ class ArvPutUploadJob(object):
         self.use_cache = use_cache
         self.update = False
         self.reporter = reporter
-        self.bytes_expected = bytes_expected
+        # This will set to 0 before start counting, if no special files are going
+        # to be read.
+        self.bytes_expected = None
         self.bytes_written = 0
         self.bytes_skipped = 0
         self.name = name
@@ -435,72 +437,97 @@ class ArvPutUploadJob(object):
         # Load cached data if any and if needed
         self._setup_state(update_collection)
 
+        # Build the upload file list, excluding requested files and counting the
+        # bytes expected to be uploaded.
+        self._build_upload_list()
+
+    def _build_upload_list(self):
+        """
+        Scan the requested paths to count file sizes, excluding files & dirs if requested
+        and building the upload file list.
+        """
+        # If there aren't special files to be read, reset total bytes count to zero
+        # to start counting.
+        if not any(filter(lambda p: not (os.path.isfile(p) or os.path.isdir(p)),
+                          self.paths)):
+            self.bytes_expected = 0
+
+        for path in self.paths:
+            # Test for stdin first, in case some file named '-' exist
+            if path == '-':
+                if self.dry_run:
+                    raise ArvPutUploadIsPending()
+                self._write_stdin(self.filename or 'stdin')
+            elif not os.path.exists(path):
+                 raise PathDoesNotExistError("file or directory '{}' does not exist.".format(path))
+            elif os.path.isdir(path):
+                # Use absolute paths on cache index so CWD doesn't interfere
+                # with the caching logic.
+                orig_path = path
+                path = os.path.abspath(path)
+                if orig_path[-1:] == os.sep:
+                    # When passing a directory reference with a trailing slash,
+                    # its contents should be uploaded directly to the collection's root.
+                    prefixdir = path
+                else:
+                    # When passing a directory reference with no trailing slash,
+                    # upload the directory to the collection's root.
+                    prefixdir = os.path.dirname(path)
+                prefixdir += os.sep
+                for root, dirs, files in os.walk(path, followlinks=self.follow_links):
+                    root_relpath = os.path.relpath(root, path)
+                    # Exclude files/dirs by full path matching pattern
+                    if self.exclude_paths:
+                        dirs[:] = filter(
+                            lambda d: not any([pathname_match(os.path.join(root_relpath, d),
+                                                              pat)
+                                               for pat in self.exclude_paths]),
+                            dirs)
+                        files = filter(
+                            lambda f: not any([pathname_match(os.path.join(root_relpath, f),
+                                                              pat)
+                                               for pat in self.exclude_paths]),
+                            files)
+                    # Exclude files/dirs by name matching pattern
+                    if self.exclude_names is not None:
+                        dirs[:] = filter(lambda d: not self.exclude_names.match(d), dirs)
+                        files = filter(lambda f: not self.exclude_names.match(f), files)
+                    # Make os.walk()'s dir traversing order deterministic
+                    dirs.sort()
+                    files.sort()
+                    for f in files:
+                        filepath = os.path.join(root, f)
+                        # Add its size to the total bytes count (if applicable)
+                        if self.follow_links or (not os.path.islink(filepath)):
+                            if self.bytes_expected is not None:
+                                self.bytes_expected += os.path.getsize(filepath)
+                        self._check_file(filepath,
+                                         os.path.join(root[len(prefixdir):], f))
+            else:
+                filepath = os.path.abspath(path)
+                # Add its size to the total bytes count (if applicable)
+                if self.follow_links or (not os.path.islink(filepath)):
+                    if self.bytes_expected is not None:
+                        self.bytes_expected += os.path.getsize(filepath)
+                self._check_file(filepath,
+                                 self.filename or os.path.basename(path))
+        # If dry-mode is on, and got up to this point, then we should notify that
+        # there aren't any file to upload.
+        if self.dry_run:
+            raise ArvPutUploadNotPending()
+        # Remove local_collection's files that don't exist locally anymore, so the
+        # bytes_written count is correct.
+        for f in self.collection_file_paths(self._local_collection,
+                                            path_prefix=""):
+            if f != 'stdin' and f != self.filename and not f in self._file_paths:
+                self._local_collection.remove(f)
+
     def start(self, save_collection):
         """
         Start supporting thread & file uploading
         """
-        if not self.dry_run:
-            self._checkpointer.start()
+        self._checkpointer.start()
         try:
-            for path in self.paths:
-                # Test for stdin first, in case some file named '-' exist
-                if path == '-':
-                    if self.dry_run:
-                        raise ArvPutUploadIsPending()
-                    self._write_stdin(self.filename or 'stdin')
-                elif not os.path.exists(path):
-                     raise PathDoesNotExistError("file or directory '{}' does not exist.".format(path))
-                elif os.path.isdir(path):
-                    # Use absolute paths on cache index so CWD doesn't interfere
-                    # with the caching logic.
-                    orig_path = path
-                    path = os.path.abspath(path)
-                    if orig_path[-1:] == os.sep:
-                        # When passing a directory reference with a trailing slash,
-                        # its contents should be uploaded directly to the collection's root.
-                        prefixdir = path
-                    else:
-                        # When passing a directory reference with no trailing slash,
-                        # upload the directory to the collection's root.
-                        prefixdir = os.path.dirname(path)
-                    prefixdir += os.sep
-                    for root, dirs, files in os.walk(path, followlinks=self.follow_links):
-                        root_relpath = os.path.relpath(root, path)
-                        # Exclude files/dirs by full path matching pattern
-                        if self.exclude_paths:
-                            dirs[:] = filter(
-                                lambda d: not any([pathname_match(os.path.join(root_relpath, d),
-                                                                  pat)
-                                                   for pat in self.exclude_paths]),
-                                dirs)
-                            files = filter(
-                                lambda f: not any([pathname_match(os.path.join(root_relpath, f),
-                                                                  pat)
-                                                   for pat in self.exclude_paths]),
-                                files)
-                        # Exclude files/dirs by name matching pattern
-                        if self.exclude_names is not None:
-                            dirs[:] = filter(lambda d: not self.exclude_names.match(d), dirs)
-                            files = filter(lambda f: not self.exclude_names.match(f), files)
-                        # Make os.walk()'s dir traversing order deterministic
-                        dirs.sort()
-                        files.sort()
-                        for f in files:
-                            self._check_file(os.path.join(root, f),
-                                             os.path.join(root[len(prefixdir):], f))
-                else:
-                    self._check_file(os.path.abspath(path),
-                                     self.filename or os.path.basename(path))
-            # If dry-mode is on, and got up to this point, then we should notify that
-            # there aren't any file to upload.
-            if self.dry_run:
-                raise ArvPutUploadNotPending()
-            # Remove local_collection's files that don't exist locally anymore, so the
-            # bytes_written count is correct.
-            for f in self.collection_file_paths(self._local_collection,
-                                                path_prefix=""):
-                if f != 'stdin' and f != self.filename and not f in self._file_paths:
-                    self._local_collection.remove(f)
             # Update bytes_written from current local collection and
             # report initial progress.
             self._update()
@@ -686,7 +713,13 @@ class ArvPutUploadJob(object):
             should_upload = True
 
         if should_upload:
-            self._files_to_upload.append((source, resume_offset, filename))
+            try:
+                self._files_to_upload.append((source, resume_offset, filename))
+            except ArvPutUploadIsPending:
+                # This could happen when running on dry-mode, close cache file to
+                # avoid locking issues.
+                self._cache_file.close()
+                raise
 
     def _upload_files(self):
         for source, resume_offset, filename in self._files_to_upload:
@@ -865,45 +898,6 @@ class ArvPutUploadJob(object):
             datablocks = self._datablocks_on_item(self._my_collection())
         return datablocks
 
-def expected_bytes_for(pathlist, follow_links=True, exclude={}):
-    # Walk the given directory trees and stat files, adding up file sizes,
-    # so we can display progress as percent
-    bytesum = 0
-    exclude_paths = exclude.get('paths', None)
-    exclude_names = exclude.get('names', None)
-    for path in pathlist:
-        if os.path.isdir(path):
-            for root, dirs, files in os.walk(path, followlinks=follow_links):
-                root_relpath = os.path.relpath(root, path)
-                # Exclude files/dirs by full path matching pattern
-                if exclude_paths is not None:
-                    dirs[:] = filter(
-                        lambda d: not any([pathname_match(os.path.join(root_relpath, d),
-                                                          pat)
-                                           for pat in exclude_paths]),
-                        dirs)
-                    files = filter(
-                        lambda f: not any([pathname_match(os.path.join(root_relpath, f),
-                                                          pat)
-                                           for pat in exclude_paths]),
-                        files)
-                # Exclude files/dirs by name matching pattern
-                if exclude_names is not None:
-                    dirs[:] = filter(lambda d: not exclude_names.match(d), dirs)
-                    files = filter(lambda f: not exclude_names.match(f), files)
-                # Sum file sizes
-                for f in files:
-                    filepath = os.path.join(root, f)
-                    # Ignore symlinked files when requested
-                    if (not follow_links) and os.path.islink(filepath):
-                        continue
-                    bytesum += os.path.getsize(filepath)
-        elif not os.path.isfile(path):
-            return None
-        else:
-            bytesum += os.path.getsize(path)
-    return bytesum
-
 _machine_format = "{} {}: {{}} written {{}} total\n".format(sys.argv[0],
                                                             os.getpid())
 
@@ -1029,19 +1023,12 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     # uploaded, the expected bytes calculation can take a moment.
     if args.progress and any([os.path.isdir(f) for f in args.paths]):
         logger.info("Calculating upload size, this could take some time...")
-    bytes_expected = expected_bytes_for(args.paths,
-                                        follow_links=args.follow_links,
-                                        exclude={'paths': exclude_paths,
-                                                 'names': exclude_names})
-
-
     try:
         writer = ArvPutUploadJob(paths = args.paths,
                                  resume = args.resume,
                                  use_cache = args.use_cache,
                                  filename = args.filename,
                                  reporter = reporter,
-                                 bytes_expected = bytes_expected,
                                  num_retries = args.retries,
                                  replication_desired = args.replication,
                                  put_threads = args.threads,
@@ -1069,6 +1056,10 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     except ArvPutUploadNotPending:
         # No files pending for upload
         sys.exit(0)
+    except PathDoesNotExistError as error:
+        logger.error("\n".join([
+            "arv-put: %s" % str(error)]))
+        sys.exit(1)
 
     # Install our signal handler for each code in CAUGHT_SIGNALS, and save
     # the originals.
@@ -1089,16 +1080,6 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
         logger.error("\n".join([
             "arv-put: %s" % str(error)]))
         sys.exit(1)
-    except ArvPutUploadIsPending:
-        # Dry run check successful, return proper exit code.
-        sys.exit(2)
-    except ArvPutUploadNotPending:
-        # No files pending for upload
-        sys.exit(0)
-    except PathDoesNotExistError as error:
-        logger.error("\n".join([
-            "arv-put: %s" % str(error)]))
-        sys.exit(1)
 
     if args.progress:  # Print newline to split stderr from stdout for humans.
         logger.info("\n")
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py
index c2eaf12..bfa39b8 100644
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -299,9 +299,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
 
     def test_passing_nonexistant_path_raise_exception(self):
         uuid_str = str(uuid.uuid4())
-        cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
         with self.assertRaises(arv_put.PathDoesNotExistError):
-            cwriter.start(save_collection=False)
+            cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
 
     def test_writer_works_without_cache(self):
         cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
@@ -336,7 +335,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
             for expect_count in (None, 8):
                 progression, reporter = self.make_progress_tester()
                 cwriter = arv_put.ArvPutUploadJob([f.name],
-                    reporter=reporter, bytes_expected=expect_count)
+                                                  reporter=reporter)
+                cwriter.bytes_expected = expect_count
                 cwriter.start(save_collection=False)
                 cwriter.destroy_cache()
                 self.assertIn((3, expect_count), progression)
@@ -492,23 +492,20 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
             self.assertGreater(writer.bytes_written, 0)
             self.assertLess(writer.bytes_written,
                             os.path.getsize(self.large_file_name))
-        # Retry the upload using dry_run to check if there is a pending upload
-        writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
-                                          replication_desired=1,
-                                          dry_run=True)
         with self.assertRaises(arv_put.ArvPutUploadIsPending):
-            writer2.start(save_collection=False)
+            # Retry the upload using dry_run to check if there is a pending upload
+            writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                              replication_desired=1,
+                                              dry_run=True)
         # Complete the pending upload
         writer3 = arv_put.ArvPutUploadJob([self.large_file_name],
                                           replication_desired=1)
         writer3.start(save_collection=False)
-        # Confirm there's no pending upload with dry_run=True
-        writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
-                                          replication_desired=1,
-                                          dry_run=True)
         with self.assertRaises(arv_put.ArvPutUploadNotPending):
-            writer4.start(save_collection=False)
-        writer4.destroy_cache()
+            # Confirm there's no pending upload with dry_run=True
+            writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                              replication_desired=1,
+                                              dry_run=True)
         # Test obvious cases
         with self.assertRaises(arv_put.ArvPutUploadIsPending):
             arv_put.ArvPutUploadJob([self.large_file_name],
@@ -527,21 +524,27 @@ class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
     TEST_SIZE = os.path.getsize(__file__)
 
     def test_expected_bytes_for_file(self):
+        writer = arv_put.ArvPutUploadJob([__file__])
         self.assertEqual(self.TEST_SIZE,
-                          arv_put.expected_bytes_for([__file__]))
+                         writer.bytes_expected)
 
     def test_expected_bytes_for_tree(self):
         tree = self.make_tmpdir()
         shutil.copyfile(__file__, os.path.join(tree, 'one'))
         shutil.copyfile(__file__, os.path.join(tree, 'two'))
+
+        writer = arv_put.ArvPutUploadJob([tree])
         self.assertEqual(self.TEST_SIZE * 2,
-                          arv_put.expected_bytes_for([tree]))
+                         writer.bytes_expected)
+        writer = arv_put.ArvPutUploadJob([tree, __file__])
         self.assertEqual(self.TEST_SIZE * 3,
-                          arv_put.expected_bytes_for([tree, __file__]))
+                         writer.bytes_expected)
 
     def test_expected_bytes_for_device(self):
-        self.assertIsNone(arv_put.expected_bytes_for(['/dev/null']))
-        self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null']))
+        writer = arv_put.ArvPutUploadJob(['/dev/null'])
+        self.assertIsNone(writer.bytes_expected)
+        writer = arv_put.ArvPutUploadJob([__file__, '/dev/null'])
+        self.assertIsNone(writer.bytes_expected)
 
 
 class ArvadosPutReportTest(ArvadosBaseTestCase):

commit 91a7feea5074d303cb0eae3f5c0d53b61d37fc81
Merge: a9cdc57 b1f9400
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Tue Jun 20 17:41:36 2017 -0300

    11789: Merge branch 'master' into 11789-arvput-exclude-flag
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>

diff --cc sdk/python/arvados/commands/put.py
index 4aab5d5,e591656..ba7ff2b
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@@ -451,27 -440,18 +453,35 @@@ class ArvPutUploadJob(object)
                  elif os.path.isdir(path):
                      # Use absolute paths on cache index so CWD doesn't interfere
                      # with the caching logic.
-                     prefixdir = path = os.path.abspath(path)
-                     if prefixdir != '/':
-                         prefixdir += '/'
+                     orig_path = path
+                     path = os.path.abspath(path)
+                     if orig_path[-1:] == os.sep:
+                         # When passing a directory reference with a trailing slash,
+                         # its contents should be uploaded directly to the collection's root.
+                         prefixdir = path
+                     else:
+                         # When passing a directory reference with no trailing slash,
+                         # upload the directory to the collection's root.
+                         prefixdir = os.path.dirname(path)
+                     prefixdir += os.sep
                      for root, dirs, files in os.walk(path, followlinks=self.follow_links):
 +                        root_relpath = os.path.relpath(root, path)
 +                        # Exclude files/dirs by full path matching pattern
 +                        if self.exclude_paths:
 +                            dirs[:] = filter(
 +                                lambda d: not any([pathname_match(os.path.join(root_relpath, d),
 +                                                                  pat)
 +                                                   for pat in self.exclude_paths]),
 +                                dirs)
 +                            files = filter(
 +                                lambda f: not any([pathname_match(os.path.join(root_relpath, f),
 +                                                                  pat)
 +                                                   for pat in self.exclude_paths]),
 +                                files)
 +                        # Exclude files/dirs by name matching pattern
 +                        if self.exclude_names is not None:
 +                            dirs[:] = filter(lambda d: not self.exclude_names.match(d), dirs)
 +                            files = filter(lambda f: not self.exclude_names.match(f), files)
                          # Make os.walk()'s dir traversing order deterministic
                          dirs.sort()
                          files.sort()

commit a9cdc576d334dc5cb1a722f2b1c6b2266af8f986
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Tue Jun 20 17:24:13 2017 -0300

    11789: Splitted exclude_* arguments on ArvPutUploadJob class. Simplified
    the way exclude_paths list is handled.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 94e4e2d..4aab5d5 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -386,7 +386,7 @@ class ArvPutUploadJob(object):
                  put_threads=None, replication_desired=None,
                  filename=None, update_time=60.0, update_collection=None,
                  logger=logging.getLogger('arvados.arv_put'), dry_run=False,
-                 follow_links=True, exclude={}):
+                 follow_links=True, exclude_paths=[], exclude_names=None):
         self.paths = paths
         self.resume = resume
         self.use_cache = use_cache
@@ -420,7 +420,8 @@ class ArvPutUploadJob(object):
         self.dry_run = dry_run
         self._checkpoint_before_quit = True
         self.follow_links = follow_links
-        self.exclude = exclude
+        self.exclude_paths = exclude_paths
+        self.exclude_names = exclude_names
 
         if not self.use_cache and self.resume:
             raise ArvPutArgumentConflict('resume cannot be True when use_cache is False')
@@ -436,8 +437,6 @@ class ArvPutUploadJob(object):
         """
         Start supporting thread & file uploading
         """
-        exclude_paths = self.exclude.get('paths', None)
-        exclude_names = self.exclude.get('names', None)
         if not self.dry_run:
             self._checkpointer.start()
         try:
@@ -458,21 +457,21 @@ class ArvPutUploadJob(object):
                     for root, dirs, files in os.walk(path, followlinks=self.follow_links):
                         root_relpath = os.path.relpath(root, path)
                         # Exclude files/dirs by full path matching pattern
-                        if exclude_paths is not None:
+                        if self.exclude_paths:
                             dirs[:] = filter(
                                 lambda d: not any([pathname_match(os.path.join(root_relpath, d),
                                                                   pat)
-                                                   for pat in exclude_paths]),
+                                                   for pat in self.exclude_paths]),
                                 dirs)
                             files = filter(
                                 lambda f: not any([pathname_match(os.path.join(root_relpath, f),
                                                                   pat)
-                                                   for pat in exclude_paths]),
+                                                   for pat in self.exclude_paths]),
                                 files)
                         # Exclude files/dirs by name matching pattern
-                        if exclude_names is not None:
-                            dirs[:] = filter(lambda d: not exclude_names.match(d), dirs)
-                            files = filter(lambda f: not exclude_names.match(f), files)
+                        if self.exclude_names is not None:
+                            dirs[:] = filter(lambda d: not self.exclude_names.match(d), dirs)
+                            files = filter(lambda f: not self.exclude_names.match(f), files)
                         # Make os.walk()'s dir traversing order deterministic
                         dirs.sort()
                         files.sort()
@@ -988,13 +987,14 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
         reporter = None
 
     # Setup exclude regex from all the --exclude arguments provided
+    name_patterns = []
+    exclude_paths = []
+    exclude_names = None
     if len(args.exclude) > 0:
         # We're supporting 2 kinds of exclusion patterns:
         # 1) --exclude '*.jpg'      (file/dir name patterns, will only match the name)
         # 2) --exclude 'foo/bar'    (file/dir path patterns, will match the entire path,
         #                            and should be relative to any input dir argument)
-        name_patterns = []
-        path_patterns = []
         for p in args.exclude:
             # Only relative paths patterns allowed
             if p.startswith(os.sep):
@@ -1002,11 +1002,10 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
                 sys.exit(1)
             if os.path.dirname(p):
                 # Path search pattern
-                path_patterns.append(p)
+                exclude_paths.append(p)
             else:
                 # Name-only search pattern
                 name_patterns.append(p)
-        exclude_paths = path_patterns if len(path_patterns) > 0 else None
         # For name only matching, we can combine all patterns into a single regexp,
         # for better performance.
         exclude_names = re.compile('|'.join(
@@ -1015,9 +1014,6 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
         # Show the user the patterns to be used, just in case they weren't specified inside
         # quotes and got changed by the shell expansion.
         logger.info("Exclude patterns: {}".format(args.exclude))
-    else:
-        exclude_paths = None
-        exclude_names = None
 
     # If this is used by a human, and there's at least one directory to be
     # uploaded, the expected bytes calculation can take a moment.
@@ -1046,8 +1042,8 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
                                  logger=logger,
                                  dry_run=args.dry_run,
                                  follow_links=args.follow_links,
-                                 exclude={'paths': exclude_paths,
-                                          'names': exclude_names})
+                                 exclude_paths=exclude_paths,
+                                 exclude_names=exclude_names)
     except ResumeCacheConflict:
         logger.error("\n".join([
             "arv-put: Another process is already uploading this data.",

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list