[ARVADOS] updated: 2.1.0-1165-g3fc0f9610

Git user git at public.arvados.org
Fri Aug 6 19:56:23 UTC 2021


Summary of changes:
 .../test-collection-create.py                      | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

       via  3fc0f9610deaf28c4ffcfce6cbf22a1a5fb0dace (commit)
      from  92581ecfd03a92309d9506730e9807eb7a9dd634 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 3fc0f9610deaf28c4ffcfce6cbf22a1a5fb0dace
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Fri Aug 6 16:55:16 2021 -0300

    17948: Optimizes tool to stop adding data to a big enough manifest.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/tools/test-collection-create/test-collection-create.py b/tools/test-collection-create/test-collection-create.py
index ddd5d04ba..12f9ebe02 100644
--- a/tools/test-collection-create/test-collection-create.py
+++ b/tools/test-collection-create/test-collection-create.py
@@ -60,15 +60,24 @@ def get_stream(name, max_filesize, data_loc, args):
     stream = "{} {} {}".format(name, data_loc, ' '.join(files))
     return stream
 
-def create_substreams(depth, base_stream_name, max_filesize, data_loc, args):
-    streams = [get_stream(base_stream_name, max_filesize, data_loc, args)]
-    if depth == 0:
-        logger.info("Finished stream {}".format(base_stream_name))
+def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, current_size=0):
+    current_stream = get_stream(base_stream_name, max_filesize, data_loc, args)
+    current_size += len(current_stream)
+    streams = [current_stream]
+
+    if current_size >= (128 * 1024 * 1024):
+        logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name))
+    elif depth == 0:
+        logger.debug("Finished stream {}".format(base_stream_name))
     else:
         for _ in range(random.randint(1, 10)):
             stream_name = base_stream_name+'/'+get_random_name(False)
-            streams.extend(
-                create_substreams(depth-1, stream_name, max_filesize, data_loc, args))
+            substreams = create_substreams(depth-1, stream_name, max_filesize,
+                data_loc, args, current_size)
+            current_size += sum([len(x) for x in substreams])
+            if current_size >= (128 * 1024 * 1024) == 0:
+                break
+            streams.extend(substreams)
     return streams
 
 def parse_arguments(arguments):
@@ -85,6 +94,7 @@ def parse_arguments(arguments):
 
 def main(arguments=None):
     args = parse_arguments(arguments)
+    logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={})...".format(args.min_files, args.max_files, args.min_depth, args.max_depth))
     api = arvados.api('v1', timeout=5*60)
     max_filesize = 1024*1024
     data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)])

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list