[ARVADOS] created: 9c9b52038aa8b9c15f02567d186539fd8794d0f2
Git user
git at public.curoverse.com
Thu Feb 2 17:14:04 EST 2017
at 9c9b52038aa8b9c15f02567d186539fd8794d0f2 (commit)
commit 9c9b52038aa8b9c15f02567d186539fd8794d0f2
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Thu Feb 2 19:10:45 2017 -0300
10932: Replaced the use of a list with a set to check if files on local collection are on the local file list, so that the resume start time is greatly reduced.
Also, the save_state method was taking too much time on two operations: deepcopy() and json.dump(). Replaced both with just one call to json.dumps() that's a lot faster than json.dump().
This will improve overall performance on big file collections uploads.
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 5b46ba7..2fbac22 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -436,9 +436,11 @@ class ArvPutUploadJob(object):
raise ArvPutUploadNotPending()
# Remove local_collection's files that don't exist locally anymore, so the
# bytes_written count is correct.
+ # Using a set because is lot faster than a list in this case
+ file_paths = set(self._file_paths)
for f in self.collection_file_paths(self._local_collection,
path_prefix=""):
- if f != 'stdin' and f != self.filename and not f in self._file_paths:
+ if f != 'stdin' and f != self.filename and not f in file_paths:
self._local_collection.remove(f)
# Update bytes_written from current local collection and
# report initial progress.
@@ -703,12 +705,12 @@ class ArvPutUploadJob(object):
"""
try:
with self._state_lock:
- state = copy.deepcopy(self._state)
+ state = json.dumps(self._state)
new_cache_fd, new_cache_name = tempfile.mkstemp(
dir=os.path.dirname(self._cache_filename))
self._lock_file(new_cache_fd)
new_cache = os.fdopen(new_cache_fd, 'r+')
- json.dump(state, new_cache)
+ new_cache.write(state)
new_cache.flush()
os.fsync(new_cache)
os.rename(new_cache_name, self._cache_filename)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list