[ARVADOS] updated: 9f2fccd1c01823a762044c8a73e6fa0f7ed9086b
Git user
git at public.curoverse.com
Mon Dec 12 17:33:29 EST 2016
Summary of changes:
sdk/python/arvados/commands/put.py | 48 ++++++++++++++++++++++++++++++++++--
sdk/python/tests/test_arv_put.py | 50 ++++++++++++++++++++++++++++++++++++++
2 files changed, 96 insertions(+), 2 deletions(-)
via 9f2fccd1c01823a762044c8a73e6fa0f7ed9086b (commit)
from 5646f9476220ef151d1811acb4eff88ebe8ef530 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 9f2fccd1c01823a762044c8a73e6fa0f7ed9086b
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Mon Dec 12 19:32:10 2016 -0300
10383: Added --dry-run argument so that arv-put can be used inside a script to check if there is a pending file upload.
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 66e122c..0fc307a 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -52,6 +52,12 @@ Normalize the manifest by re-ordering files and streams after writing
data.
""")
+_group.add_argument('--dry-run', action='store_true', default=False,
+ help="""
+Don't actually upload files, but only check if any file should be
+uploaded. Exit with code=2 when files are pending for upload.
+""")
+
_group = upload_opts.add_mutually_exclusive_group()
_group.add_argument('--as-stream', action='store_true', dest='stream',
@@ -226,9 +232,30 @@ class CollectionUpdateError(Exception):
class ResumeCacheConflict(Exception):
pass
+
class ArvPutArgumentConflict(Exception):
pass
+
+class ArvPutUploadIsPending(Exception):
+ pass
+
+
+class ArvPutUploadNotPending(Exception):
+ pass
+
+
+class FileUploadList(list):
+ def __init__(self, dry_run=False):
+ list.__init__(self)
+ self.dry_run = dry_run
+
+ def append(self, other):
+ if self.dry_run:
+ raise ArvPutUploadIsPending()
+ super(FileUploadList, self).append(other)
+
+
class ResumeCache(object):
CACHE_DIR = '.cache/arvados/arv-put'
@@ -322,7 +349,7 @@ class ArvPutUploadJob(object):
bytes_expected=None, name=None, owner_uuid=None,
ensure_unique_name=False, num_retries=None, replication_desired=None,
filename=None, update_time=20.0, update_collection=None,
- logger=logging.getLogger('arvados.arv_put')):
+ logger=logging.getLogger('arvados.arv_put'), dry_run=False):
self.paths = paths
self.resume = resume
self.use_cache = use_cache
@@ -348,12 +375,17 @@ class ArvPutUploadJob(object):
self._stop_checkpointer = threading.Event()
self._checkpointer = threading.Thread(target=self._update_task)
self._update_task_time = update_time # How many seconds wait between update runs
- self._files_to_upload = []
+ self._files_to_upload = FileUploadList(dry_run=dry_run)
self.logger = logger
+ self.dry_run = dry_run
if not self.use_cache and self.resume:
raise ArvPutArgumentConflict('resume cannot be True when use_cache is False')
+ # Check for obvious dry-run responses
+ if self.dry_run and (not self.use_cache or not self.resume):
+ raise ArvPutUploadIsPending()
+
# Load cached data if any and if needed
self._setup_state(update_collection)
@@ -367,6 +399,8 @@ class ArvPutUploadJob(object):
for path in self.paths:
# Test for stdin first, in case some file named '-' exist
if path == '-':
+ if self.dry_run:
+ raise ArvPutUploadIsPending()
self._write_stdin(self.filename or 'stdin')
elif os.path.isdir(path):
# Use absolute paths on cache index so CWD doesn't interfere
@@ -384,6 +418,10 @@ class ArvPutUploadJob(object):
else:
self._check_file(os.path.abspath(path),
self.filename or os.path.basename(path))
+ # If dry-mode is on, and got up to this point, then we should notify that
+ # there aren't any file to upload.
+ if self.dry_run:
+ raise ArvPutUploadNotPending()
# Update bytes_written from current local collection and
# report initial progress.
self._update()
@@ -805,6 +843,12 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
logger.error("\n".join([
"arv-put: %s" % str(error)]))
sys.exit(1)
+ except ArvPutUploadIsPending:
+ # Dry run check successful, return proper exit code.
+ sys.exit(2)
+ except ArvPutUploadNotPending:
+ # No files pending for upload
+ sys.exit(0)
# Install our signal handler for each code in CAUGHT_SIGNALS, and save
# the originals.
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py
index bc933e2..f1dfd03 100644
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -397,6 +397,56 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
writer2.destroy_cache()
+ def test_dry_run_feature(self):
+ def wrapped_write(*args, **kwargs):
+ data = args[1]
+ # Exit only on last block
+ if len(data) < arvados.config.KEEP_BLOCK_SIZE:
+ raise SystemExit("Simulated error")
+ return self.arvfile_write(*args, **kwargs)
+
+ with mock.patch('arvados.arvfile.ArvadosFileWriter.write',
+ autospec=True) as mocked_write:
+ mocked_write.side_effect = wrapped_write
+ writer = arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1)
+ with self.assertRaises(SystemExit):
+ writer.start(save_collection=False)
+ # Confirm that the file was partially uploaded
+ self.assertGreater(writer.bytes_written, 0)
+ self.assertLess(writer.bytes_written,
+ os.path.getsize(self.large_file_name))
+ # Retry the upload using dry_run to check if there is a pending upload
+ writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1,
+ dry_run=True)
+ with self.assertRaises(arv_put.ArvPutUploadIsPending):
+ writer2.start(save_collection=False)
+ # Complete the pending upload
+ writer3 = arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1)
+ writer3.start(save_collection=False)
+ # Confirm there's no pending upload with dry_run=True
+ writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1,
+ dry_run=True)
+ with self.assertRaises(arv_put.ArvPutUploadNotPending):
+ writer4.start(save_collection=False)
+ writer4.destroy_cache()
+ # Test obvious cases
+ with self.assertRaises(arv_put.ArvPutUploadIsPending):
+ arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1,
+ dry_run=True,
+ resume=False,
+ use_cache=False)
+ with self.assertRaises(arv_put.ArvPutUploadIsPending):
+ arv_put.ArvPutUploadJob([self.large_file_name],
+ replication_desired=1,
+ dry_run=True,
+ resume=False)
+
+
class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
TEST_SIZE = os.path.getsize(__file__)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list