[arvados] created: 2.6.0-80-g7ac1e7337
git repository hosting
git at public.arvados.org
Mon May 1 14:58:57 UTC 2023
at 7ac1e7337dca972ce93d04e5eccb6f4de9aed141 (commit)
commit 7ac1e7337dca972ce93d04e5eccb6f4de9aed141
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon May 1 10:58:25 2023 -0400
20462: Refactor "common prefix" function & fix fencepost errors
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index 895676565..bd32e9f81 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -235,6 +235,7 @@ def fix_schemadef(req, baseuri, urlexpander, merged_map, jobmapper, pdh):
merged_map[mm].resolved[r] = rename
return req
+
def drop_ids(d):
if isinstance(d, MutableSequence):
for i, s in enumerate(d):
@@ -247,6 +248,32 @@ def drop_ids(d):
drop_ids(d[field])
+def common_prefix(firstfile, all_files):
+ n = 0
+ allmatch = True
+ if not firstfile:
+ return ""
+
+ while allmatch and n < len(firstfile)-1:
+ n += 1
+ for f in all_files:
+ if len(f)-1 < n:
+ n -= 1
+ allmatch = False
+ break
+ if f[n] != firstfile[n]:
+ allmatch = False
+ break
+
+ while n > 0 and firstfile[n] != "/":
+ n -= 1
+
+ if firstfile[n] == "/":
+ n += 1
+
+ return firstfile[:n]
+
+
def upload_workflow(arvRunner, tool, job_order, project_uuid,
runtimeContext,
uuid=None,
@@ -280,22 +307,7 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
# Find the longest common prefix among all the file names. We'll
# use this to recreate the directory structure in a keep
# collection with correct relative references.
- n = 7
- allmatch = True
- if firstfile:
- while allmatch:
- n += 1
- for f in all_files:
- if len(f)-1 < n:
- n -= 1
- allmatch = False
- break
- if f[n] != firstfile[n]:
- allmatch = False
- break
-
- while firstfile[n] != "/":
- n -= 1
+ prefix = common_prefix(firstfile, all_files)
col = arvados.collection.Collection(api_client=arvRunner.api)
@@ -332,22 +344,22 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
update_refs(result, w, tool.doc_loader.expand_url, merged_map, jobmapper, runtimeContext, "", "")
# Write the updated file to the collection.
- with col.open(w[n+1:], "wt") as f:
+ with col.open(w[len(prefix):], "wt") as f:
if export_as_json:
json.dump(result, f, indent=4, separators=(',',': '))
else:
yamlloader.dump(result, stream=f)
# Also store a verbatim copy of the original files
- with col.open(os.path.join("original", w[n+1:]), "wt") as f:
+ with col.open(os.path.join("original", w[len(prefix):]), "wt") as f:
f.write(text)
# Upload files referenced by $include directives, these are used
# unchanged and don't need to be updated.
for w in include_files:
- with col.open(w[n+1:], "wb") as f1:
- with col.open(os.path.join("original", w[n+1:]), "wb") as f3:
+ with col.open(w[len(prefix):], "wb") as f1:
+ with col.open(os.path.join("original", w[len(prefix):]), "wb") as f3:
with open(uri_file_path(w), "rb") as f2:
dat = f2.read(65536)
while dat:
@@ -361,7 +373,7 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid,
if git_info and git_info.get("http://arvados.org/cwl#gitDescribe"):
toolname = "%s (%s)" % (toolname, git_info.get("http://arvados.org/cwl#gitDescribe"))
- toolfile = tool.tool["id"][n+1:]
+ toolfile = tool.tool["id"][len(prefix):]
properties = {
"type": "workflow",
diff --git a/sdk/cwl/tests/test_util.py b/sdk/cwl/tests/test_util.py
index 1209b88d8..183e0fcc6 100644
--- a/sdk/cwl/tests/test_util.py
+++ b/sdk/cwl/tests/test_util.py
@@ -11,6 +11,7 @@ import httplib2
from arvados_cwl.util import *
from arvados.errors import ApiError
+from arvados_cwl.arvworkflow import common_prefix
class MockDateTime(datetime.datetime):
@classmethod
@@ -53,4 +54,19 @@ class TestUtil(unittest.TestCase):
logger = mock.MagicMock()
current_container = get_current_container(api, num_retries=0, logger=logger)
- self.assertEqual(current_container, None)
\ No newline at end of file
+ self.assertEqual(current_container, None)
+
+ def test_common_prefix(self):
+ self.assertEqual(common_prefix("file:///foo/bar", ["file:///foo/bar/baz"]), "file:///foo/")
+ self.assertEqual(common_prefix("file:///foo", ["file:///foo", "file:///foo/bar", "file:///foo/bar/"]), "file:///")
+ self.assertEqual(common_prefix("file:///foo/", ["file:///foo/", "file:///foo/bar", "file:///foo/bar/"]), "file:///foo/")
+ self.assertEqual(common_prefix("file:///foo/bar", ["file:///foo/bar", "file:///foo/baz", "file:///foo/quux/q2"]), "file:///foo/")
+ self.assertEqual(common_prefix("file:///foo/bar/", ["file:///foo/bar/", "file:///foo/baz", "file:///foo/quux/q2"]), "file:///foo/")
+ self.assertEqual(common_prefix("file:///foo/bar/splat", ["file:///foo/bar/splat", "file:///foo/baz", "file:///foo/quux/q2"]), "file:///foo/")
+ self.assertEqual(common_prefix("file:///foo/bar/splat", ["file:///foo/bar/splat", "file:///nope", "file:///foo/quux/q2"]), "file:///")
+ self.assertEqual(common_prefix("file:///blub/foo", ["file:///blub/foo", "file:///blub/foo/bar", "file:///blub/foo/bar/"]), "file:///blub/")
+
+ # sanity check, the subsequent code strips off the prefix so
+ # just confirm the logic doesn't have a fencepost error
+ prefix = "file:///"
+ self.assertEqual("file:///foo/bar"[len(prefix):], "foo/bar")
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list