[ARVADOS] updated: 4d012b23a4ac88f433986054fc0085ee6714b5b3

Git user git at public.curoverse.com
Mon May 1 17:06:44 EDT 2017


Summary of changes:
 sdk/python/arvados/commands/put.py | 50 ++++++++++----------------------------
 sdk/python/tests/test_arv_put.py   | 19 +++++----------
 2 files changed, 19 insertions(+), 50 deletions(-)

       via  4d012b23a4ac88f433986054fc0085ee6714b5b3 (commit)
      from  59b27bcb7fe510ff351dd9d8f71b1d4b56d131b5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 4d012b23a4ac88f433986054fc0085ee6714b5b3
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Mon May 1 18:05:02 2017 -0300

    11579: Removed feature about not uploading a symlinked dir twice.
    When using --no-follow-links parameter, symlinked files will be also ignored.
    Updated tests.

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 90a85ba..ddc261f 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -188,12 +188,11 @@ Do not continue interrupted uploads from cached state.
 _group = run_opts.add_mutually_exclusive_group()
 _group.add_argument('--follow-links', action='store_true', default=True,
                     dest='follow_links', help="""
-Traverse directory symlinks (default).
-Multiple symlinks pointing to the same directory will only be followed once.
+Follow file and directory symlinks (default).
 """)
 _group.add_argument('--no-follow-links', action='store_false', dest='follow_links',
                     help="""
-Do not traverse directory symlinks.
+Do not follow file and directory symlinks.
 """)
 
 _group = run_opts.add_mutually_exclusive_group()
@@ -411,7 +410,6 @@ class ArvPutUploadJob(object):
         self.dry_run = dry_run
         self._checkpoint_before_quit = True
         self.follow_links = follow_links
-        self._traversed_links = set()
 
         if not self.use_cache and self.resume:
             raise ArvPutArgumentConflict('resume cannot be True when use_cache is False')
@@ -423,21 +421,6 @@ class ArvPutUploadJob(object):
         # Load cached data if any and if needed
         self._setup_state(update_collection)
 
-    def _check_traversed_dir_links(self, root, dirs):
-        """
-        Remove from the 'dirs' list the already traversed directory symlinks,
-        register the new dir symlinks as traversed.
-        """
-        for d in [d for d in dirs if os.path.isdir(os.path.join(root, d)) and
-                  os.path.islink(os.path.join(root, d))]:
-            real_dirpath = os.path.realpath(os.path.join(root, d))
-            if real_dirpath in self._traversed_links:
-                dirs.remove(d)
-                self.logger.warning("Skipping '{}' symlink to directory '{}' because it was already uploaded".format(os.path.join(root, d), real_dirpath))
-            else:
-                self._traversed_links.add(real_dirpath)
-        return dirs
-
     def start(self, save_collection):
         """
         Start supporting thread & file uploading
@@ -459,12 +442,7 @@ class ArvPutUploadJob(object):
                     prefixdir = path = os.path.abspath(path)
                     if prefixdir != '/':
                         prefixdir += '/'
-                    # If following symlinks, avoid recursive traversals
-                    if self.follow_links and os.path.islink(path):
-                        self._traversed_links.add(os.path.realpath(path))
                     for root, dirs, files in os.walk(path, followlinks=self.follow_links):
-                        if self.follow_links:
-                            dirs = self._check_traversed_dir_links(root, dirs)
                         # Make os.walk()'s dir traversing order deterministic
                         dirs.sort()
                         files.sort()
@@ -602,7 +580,12 @@ class ArvPutUploadJob(object):
         output.close()
 
     def _check_file(self, source, filename):
-        """Check if this file needs to be uploaded"""
+        """
+        Check if this file needs to be uploaded
+        """
+        # Ignore symlinks when requested
+        if (not self.follow_links) and os.path.islink(source):
+            return
         resume_offset = 0
         should_upload = False
         new_file_in_cache = False
@@ -841,24 +824,17 @@ class ArvPutUploadJob(object):
 def expected_bytes_for(pathlist, follow_links=True):
     # Walk the given directory trees and stat files, adding up file sizes,
     # so we can display progress as percent
-    linked_dirs = set()
     bytesum = 0
     for path in pathlist:
         if os.path.isdir(path):
             for root, dirs, files in os.walk(path, followlinks=follow_links):
-                if follow_links:
-                    # Skip those linked dirs that were visited more than once.
-                    for d in [x for x in dirs if os.path.islink(os.path.join(root, x))]:
-                        d_realpath = os.path.realpath(os.path.join(root, d))
-                        if d_realpath in linked_dirs:
-                            # Linked dir already visited, skip it.
-                            dirs.remove(d)
-                        else:
-                            # Will only visit this dir once
-                            linked_dirs.add(d_realpath)
                 # Sum file sizes
                 for f in files:
-                    bytesum += os.path.getsize(os.path.join(root, f))
+                    filepath = os.path.join(root, f)
+                    # Ignore symlinked files when requested
+                    if (not follow_links) and os.path.islink(filepath):
+                        continue
+                    bytesum += os.path.getsize(filepath)
         elif not os.path.isfile(path):
             return None
         else:
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py
index 774e199..2d17cb8 100644
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -271,8 +271,9 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
         self.arvfile_write = getattr(arvados.arvfile.ArvadosFileWriter, 'write')
         # Temp dir to hold a symlink to other temp dir
         self.tempdir_with_symlink = tempfile.mkdtemp()
-        os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir1'))
-        os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir2'))
+        os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir'))
+        os.symlink(os.path.join(self.tempdir, '1'),
+                   os.path.join(self.tempdir_with_symlink, 'linkedfile'))
 
     def tearDown(self):
         super(ArvPutUploadJobTest, self).tearDown()
@@ -284,23 +285,15 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
     def test_symlinks_are_followed_by_default(self):
         cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink])
         cwriter.start(save_collection=False)
-        self.assertIn('linkeddir1', cwriter.manifest_text())
-        cwriter.destroy_cache()
-
-    def test_symlinks_are_followed_only_once(self):
-        cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
-                                          follow_links=True)
-        cwriter.start(save_collection=False)
-        self.assertIn('linkeddir1', cwriter.manifest_text())
-        self.assertNotIn('linkeddir2', cwriter.manifest_text())
+        self.assertIn('linkeddir', cwriter.manifest_text())
         cwriter.destroy_cache()
 
     def test_symlinks_are_not_followed_when_requested(self):
         cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
                                           follow_links=False)
         cwriter.start(save_collection=False)
-        self.assertNotIn('linkeddir1', cwriter.manifest_text())
-        self.assertNotIn('linkeddir2', cwriter.manifest_text())
+        self.assertNotIn('linkeddir', cwriter.manifest_text())
+        self.assertNotIn('linkedfile', cwriter.manifest_text())
         cwriter.destroy_cache()
 
     def test_passing_nonexistant_path_raise_exception(self):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list