[ARVADOS] updated: 2.1.0-1165-g3fc0f9610
Git user
git at public.arvados.org
Fri Aug 6 19:56:23 UTC 2021
Summary of changes:
.../test-collection-create.py | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
via 3fc0f9610deaf28c4ffcfce6cbf22a1a5fb0dace (commit)
from 92581ecfd03a92309d9506730e9807eb7a9dd634 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 3fc0f9610deaf28c4ffcfce6cbf22a1a5fb0dace
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date: Fri Aug 6 16:55:16 2021 -0300
17948: Optimizes tool to stop adding data to a big enough manifest.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>
diff --git a/tools/test-collection-create/test-collection-create.py b/tools/test-collection-create/test-collection-create.py
index ddd5d04ba..12f9ebe02 100644
--- a/tools/test-collection-create/test-collection-create.py
+++ b/tools/test-collection-create/test-collection-create.py
@@ -60,15 +60,24 @@ def get_stream(name, max_filesize, data_loc, args):
stream = "{} {} {}".format(name, data_loc, ' '.join(files))
return stream
-def create_substreams(depth, base_stream_name, max_filesize, data_loc, args):
- streams = [get_stream(base_stream_name, max_filesize, data_loc, args)]
- if depth == 0:
- logger.info("Finished stream {}".format(base_stream_name))
+def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, current_size=0):
+ current_stream = get_stream(base_stream_name, max_filesize, data_loc, args)
+ current_size += len(current_stream)
+ streams = [current_stream]
+
+ if current_size >= (128 * 1024 * 1024):
+ logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name))
+ elif depth == 0:
+ logger.debug("Finished stream {}".format(base_stream_name))
else:
for _ in range(random.randint(1, 10)):
stream_name = base_stream_name+'/'+get_random_name(False)
- streams.extend(
- create_substreams(depth-1, stream_name, max_filesize, data_loc, args))
+ substreams = create_substreams(depth-1, stream_name, max_filesize,
+ data_loc, args, current_size)
+ current_size += sum([len(x) for x in substreams])
+ if current_size >= (128 * 1024 * 1024) == 0:
+ break
+ streams.extend(substreams)
return streams
def parse_arguments(arguments):
@@ -85,6 +94,7 @@ def parse_arguments(arguments):
def main(arguments=None):
args = parse_arguments(arguments)
+ logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={})...".format(args.min_files, args.max_files, args.min_depth, args.max_depth))
api = arvados.api('v1', timeout=5*60)
max_filesize = 1024*1024
data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)])
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list