[ARVADOS] created: 2.1.0-2309-gaade42c82

Git user git at public.arvados.org
Thu Apr 14 18:21:44 UTC 2022


        at  aade42c825f24882d9421893a1447f87708b2f33 (commit)


commit aade42c825f24882d9421893a1447f87708b2f33
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Apr 13 16:16:15 2022 -0400

    18994: Add test case, correctly re-stage files when basename is changed.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 4a91a7a83..ba209ade0 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -149,15 +149,16 @@ class ArvPathMapper(PathMapper):
         loc = srcobj["location"]
         if loc.startswith("_:"):
             return True
-        if prefix:
-            if loc != prefix+srcobj["basename"]:
-                return True
-        else:
+        if not prefix:
             i = loc.rfind("/")
             if i > -1:
                 prefix = loc[:i+1]
             else:
                 prefix = loc+"/"
+
+        if loc != prefix+srcobj["basename"]:
+            return True
+
         if srcobj["class"] == "File" and loc not in self._pathmap:
             return True
         for s in srcobj.get("secondaryFiles", []):
@@ -198,6 +199,7 @@ class ArvPathMapper(PathMapper):
                                            "Directory" if os.path.isdir(ab) else "File", True)
 
         for srcobj in referenced_files:
+            print("na na na", srcobj, srcobj["location"].endswith("/"+srcobj["basename"]))
             remap = []
             if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
@@ -217,16 +219,7 @@ class ArvPathMapper(PathMapper):
 
                 ab = self.collection_pattern % c.portable_data_hash()
                 self._pathmap[srcobj["location"]] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
-            elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
-                (srcobj["location"].startswith("_:") and "contents" in srcobj)):
-
-                # If all secondary files/directories are located in
-                # the same collection as the primary file and the
-                # paths and names that are consistent with staging,
-                # don't create a new collection.
-                if not self.needs_new_collection(srcobj):
-                    continue
-
+            elif srcobj["class"] == "File" and self.needs_new_collection(srcobj):
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                   keep_client=self.arvrunner.keep_client,
                                                   num_retries=self.arvrunner.num_retries                                                  )
diff --git a/sdk/cwl/tests/18994-basename/check.cwl b/sdk/cwl/tests/18994-basename/check.cwl
new file mode 100644
index 000000000..0046ce66c
--- /dev/null
+++ b/sdk/cwl/tests/18994-basename/check.cwl
@@ -0,0 +1,22 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+class: CommandLineTool
+cwlVersion: v1.2
+inputs:
+  p: File
+  checkname: string
+outputs: []
+arguments:
+  - sh
+  - "-c"
+  - |
+    name=`basename $(inputs.p.path)`
+    ls -l $(inputs.p.path)
+    if test $name = $(inputs.checkname) ; then
+      echo success
+    else
+      echo expected basename to be $(inputs.checkname) but was $name
+      exit 1
+    fi
diff --git a/sdk/cwl/tests/18994-basename/rename.cwl b/sdk/cwl/tests/18994-basename/rename.cwl
new file mode 100644
index 000000000..026555973
--- /dev/null
+++ b/sdk/cwl/tests/18994-basename/rename.cwl
@@ -0,0 +1,16 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+class: ExpressionTool
+cwlVersion: v1.2
+inputs:
+  f1: File
+  newname: string
+outputs:
+  out: File
+expression: |
+  ${
+  inputs.f1.basename = inputs.newname;
+  return {"out": inputs.f1};
+  }
diff --git a/sdk/cwl/tests/18994-basename/wf_ren.cwl b/sdk/cwl/tests/18994-basename/wf_ren.cwl
new file mode 100644
index 000000000..b0177494e
--- /dev/null
+++ b/sdk/cwl/tests/18994-basename/wf_ren.cwl
@@ -0,0 +1,33 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+class: Workflow
+cwlVersion: v1.2
+inputs:
+  f1:
+    type: File
+    default:
+      class: File
+      location: whale.txt
+  newname:
+    type: string
+    default:  "badger.txt"
+outputs: []
+requirements:
+  StepInputExpressionRequirement: {}
+  InlineJavascriptRequirement: {}
+steps:
+  rename:
+    in:
+      f1: f1
+      newname: newname
+    run: rename.cwl
+    out: [out]
+
+  echo:
+    in:
+      p: rename/out
+      checkname: newname
+    out: []
+    run: check.cwl
diff --git a/sdk/cwl/tests/18994-basename/whale.txt b/sdk/cwl/tests/18994-basename/whale.txt
new file mode 100644
index 000000000..9dfd0a6ab
--- /dev/null
+++ b/sdk/cwl/tests/18994-basename/whale.txt
@@ -0,0 +1,5 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+whale
diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml
index 5282e9392..9e691bdba 100644
--- a/sdk/cwl/tests/arvados-tests.yml
+++ b/sdk/cwl/tests/arvados-tests.yml
@@ -444,3 +444,8 @@
   output: {}
   tool: chipseq/cwl-packed.json
   doc: "Test issue 18723 - correctly upload two directories with the same basename"
+
+- job: null
+  output: {}
+  tool: 18994-basename/wf_ren.cwl
+  doc: "Test issue 18994 - correctly stage file with modified basename"

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list