[ARVADOS] updated: 4d012b23a4ac88f433986054fc0085ee6714b5b3
Git user
git at public.curoverse.com
Mon May 1 17:06:44 EDT 2017
Summary of changes:
sdk/python/arvados/commands/put.py | 50 ++++++++++----------------------------
sdk/python/tests/test_arv_put.py | 19 +++++----------
2 files changed, 19 insertions(+), 50 deletions(-)
via 4d012b23a4ac88f433986054fc0085ee6714b5b3 (commit)
from 59b27bcb7fe510ff351dd9d8f71b1d4b56d131b5 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 4d012b23a4ac88f433986054fc0085ee6714b5b3
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Mon May 1 18:05:02 2017 -0300
11579: Removed feature about not uploading a symlinked dir twice.
When using --no-follow-links parameter, symlinked files will be also ignored.
Updated tests.
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 90a85ba..ddc261f 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -188,12 +188,11 @@ Do not continue interrupted uploads from cached state.
_group = run_opts.add_mutually_exclusive_group()
_group.add_argument('--follow-links', action='store_true', default=True,
dest='follow_links', help="""
-Traverse directory symlinks (default).
-Multiple symlinks pointing to the same directory will only be followed once.
+Follow file and directory symlinks (default).
""")
_group.add_argument('--no-follow-links', action='store_false', dest='follow_links',
help="""
-Do not traverse directory symlinks.
+Do not follow file and directory symlinks.
""")
_group = run_opts.add_mutually_exclusive_group()
@@ -411,7 +410,6 @@ class ArvPutUploadJob(object):
self.dry_run = dry_run
self._checkpoint_before_quit = True
self.follow_links = follow_links
- self._traversed_links = set()
if not self.use_cache and self.resume:
raise ArvPutArgumentConflict('resume cannot be True when use_cache is False')
@@ -423,21 +421,6 @@ class ArvPutUploadJob(object):
# Load cached data if any and if needed
self._setup_state(update_collection)
- def _check_traversed_dir_links(self, root, dirs):
- """
- Remove from the 'dirs' list the already traversed directory symlinks,
- register the new dir symlinks as traversed.
- """
- for d in [d for d in dirs if os.path.isdir(os.path.join(root, d)) and
- os.path.islink(os.path.join(root, d))]:
- real_dirpath = os.path.realpath(os.path.join(root, d))
- if real_dirpath in self._traversed_links:
- dirs.remove(d)
- self.logger.warning("Skipping '{}' symlink to directory '{}' because it was already uploaded".format(os.path.join(root, d), real_dirpath))
- else:
- self._traversed_links.add(real_dirpath)
- return dirs
-
def start(self, save_collection):
"""
Start supporting thread & file uploading
@@ -459,12 +442,7 @@ class ArvPutUploadJob(object):
prefixdir = path = os.path.abspath(path)
if prefixdir != '/':
prefixdir += '/'
- # If following symlinks, avoid recursive traversals
- if self.follow_links and os.path.islink(path):
- self._traversed_links.add(os.path.realpath(path))
for root, dirs, files in os.walk(path, followlinks=self.follow_links):
- if self.follow_links:
- dirs = self._check_traversed_dir_links(root, dirs)
# Make os.walk()'s dir traversing order deterministic
dirs.sort()
files.sort()
@@ -602,7 +580,12 @@ class ArvPutUploadJob(object):
output.close()
def _check_file(self, source, filename):
- """Check if this file needs to be uploaded"""
+ """
+ Check if this file needs to be uploaded
+ """
+ # Ignore symlinks when requested
+ if (not self.follow_links) and os.path.islink(source):
+ return
resume_offset = 0
should_upload = False
new_file_in_cache = False
@@ -841,24 +824,17 @@ class ArvPutUploadJob(object):
def expected_bytes_for(pathlist, follow_links=True):
# Walk the given directory trees and stat files, adding up file sizes,
# so we can display progress as percent
- linked_dirs = set()
bytesum = 0
for path in pathlist:
if os.path.isdir(path):
for root, dirs, files in os.walk(path, followlinks=follow_links):
- if follow_links:
- # Skip those linked dirs that were visited more than once.
- for d in [x for x in dirs if os.path.islink(os.path.join(root, x))]:
- d_realpath = os.path.realpath(os.path.join(root, d))
- if d_realpath in linked_dirs:
- # Linked dir already visited, skip it.
- dirs.remove(d)
- else:
- # Will only visit this dir once
- linked_dirs.add(d_realpath)
# Sum file sizes
for f in files:
- bytesum += os.path.getsize(os.path.join(root, f))
+ filepath = os.path.join(root, f)
+ # Ignore symlinked files when requested
+ if (not follow_links) and os.path.islink(filepath):
+ continue
+ bytesum += os.path.getsize(filepath)
elif not os.path.isfile(path):
return None
else:
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py
index 774e199..2d17cb8 100644
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -271,8 +271,9 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
self.arvfile_write = getattr(arvados.arvfile.ArvadosFileWriter, 'write')
# Temp dir to hold a symlink to other temp dir
self.tempdir_with_symlink = tempfile.mkdtemp()
- os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir1'))
- os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir2'))
+ os.symlink(self.tempdir, os.path.join(self.tempdir_with_symlink, 'linkeddir'))
+ os.symlink(os.path.join(self.tempdir, '1'),
+ os.path.join(self.tempdir_with_symlink, 'linkedfile'))
def tearDown(self):
super(ArvPutUploadJobTest, self).tearDown()
@@ -284,23 +285,15 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
def test_symlinks_are_followed_by_default(self):
cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink])
cwriter.start(save_collection=False)
- self.assertIn('linkeddir1', cwriter.manifest_text())
- cwriter.destroy_cache()
-
- def test_symlinks_are_followed_only_once(self):
- cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
- follow_links=True)
- cwriter.start(save_collection=False)
- self.assertIn('linkeddir1', cwriter.manifest_text())
- self.assertNotIn('linkeddir2', cwriter.manifest_text())
+ self.assertIn('linkeddir', cwriter.manifest_text())
cwriter.destroy_cache()
def test_symlinks_are_not_followed_when_requested(self):
cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
follow_links=False)
cwriter.start(save_collection=False)
- self.assertNotIn('linkeddir1', cwriter.manifest_text())
- self.assertNotIn('linkeddir2', cwriter.manifest_text())
+ self.assertNotIn('linkeddir', cwriter.manifest_text())
+ self.assertNotIn('linkedfile', cwriter.manifest_text())
cwriter.destroy_cache()
def test_passing_nonexistant_path_raise_exception(self):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list