[ARVADOS] created: 1.1.4-257-g490286e
Git user
git at public.curoverse.com
Thu May 10 15:23:35 EDT 2018
at 490286e1fc9266b79967ba71b27801415e631ab0 (commit)
commit 490286e1fc9266b79967ba71b27801415e631ab0
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Thu May 10 14:39:54 2018 -0400
11907: Sort files for deterministic upload in a-c-r
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/python/arvados/commands/run.py b/sdk/python/arvados/commands/run.py
index 831e496..22c345b 100644
--- a/sdk/python/arvados/commands/run.py
+++ b/sdk/python/arvados/commands/run.py
@@ -185,9 +185,12 @@ def uploadfiles(files, api, dry_run=False, num_retries=0,
logger.info("$(input) is %s", pathprefix.rstrip('/'))
pdh = "$(input)"
else:
- files = sorted(files, key=lambda x: x.fn)
+ files.sort(key=lambda f: f.fn)
+
if collection is None:
collection = arvados.collection.Collection(api_client=api, num_retries=num_retries)
+
+ filtered = []
prev = ""
for f in files:
localpath = os.path.join(pathprefix, f.fn)
@@ -198,10 +201,26 @@ def uploadfiles(files, api, dry_run=False, num_retries=0,
# skip it because it starts with "/tmp/foo/"
continue
prev = localpath
+ filtered.append(f)
+
+ # Sort such that for each directory, files are uploaded
+ # first, then subdirectories.
+ def keyfunc(f):
+ localpath = os.path.join(pathprefix, f.fn)
+ dn, fn = os.path.split(localpath)
+ return (dn, os.path.isdir(localpath), fn)
+
+ filtered.sort(key=keyfunc)
+
+ for f in filtered:
+ localpath = os.path.join(pathprefix, f.fn)
if os.path.isfile(localpath):
write_file(collection, pathprefix, f.fn)
elif os.path.isdir(localpath):
for root, dirs, iterfiles in os.walk(localpath):
+ # Make the directory traversal deterministic
+ dirs.sort()
+ iterfiles.sort()
root = root[len(pathprefix):]
for src in iterfiles:
write_file(collection, pathprefix, os.path.join(root, src))
diff --git a/sdk/python/tests/test_arv_run.py b/sdk/python/tests/test_arv_run.py
index 1afc120..62a6701 100644
--- a/sdk/python/tests/test_arv_run.py
+++ b/sdk/python/tests/test_arv_run.py
@@ -7,6 +7,8 @@ import os
import sys
import tempfile
import unittest
+import random
+import mock
import arvados.commands.run as arv_run
from . import arvados_testutil as tutil
@@ -26,3 +28,16 @@ class ArvRunTestCase(unittest.TestCase, tutil.VersionChecker):
with self.assertRaises(SystemExit):
self.run_arv_run(['--version'])
self.assertVersionOutput(out, err)
+
+ @mock.patch('arvados.commands.run.write_file')
+ def test_uploadfiles(self, write_file_mock):
+ path = os.getcwd()
+ files = [arv_run.statfile('', 'tests/upf/'+s) for s in ('a', 'b', 'b/y', 'c/x', 'd')]
+ random.shuffle(files)
+ mockcol = mock.MagicMock()
+ arv_run.uploadfiles(files, mock.MagicMock(), collection=mockcol)
+ write_file_mock.assert_has_calls([mock.call(mockcol, path+"/tests/upf/", 'a'),
+ mock.call(mockcol, path+"/tests/upf/", 'd'),
+ mock.call(mockcol, path+"/tests/upf/", 'b/x'),
+ mock.call(mockcol, path+"/tests/upf/", 'b/y'),
+ mock.call(mockcol, path+"/tests/upf/", 'c/x')])
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list