[ARVADOS] created: a06dc44e3acd3844096ebb5829780877145c4424

Git user git at public.curoverse.com
Fri Sep 29 17:03:12 EDT 2017


        at  a06dc44e3acd3844096ebb5829780877145c4424 (commit)


commit a06dc44e3acd3844096ebb5829780877145c4424
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Sep 28 15:15:10 2017 -0400

    12213: Remapping from source to destination collection needs to apply to files as well.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 213fd21..914ccaa 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -91,18 +91,19 @@ class ArvPathMapper(PathMapper):
             for l in srcobj.get("listing", []):
                 self.visit(l, uploadfiles)
 
-    def addentry(self, obj, c, path, subdirs):
+    def addentry(self, obj, c, path, remap):
         if obj["location"] in self._pathmap:
             src, srcpath = self.arvrunner.fs_access.get_collection(self._pathmap[obj["location"]].resolved)
             if srcpath == "":
                 srcpath = "."
             c.copy(srcpath, path + "/" + obj["basename"], source_collection=src, overwrite=True)
+            remap.append((obj["location"], path + "/" + obj["basename"]))
             for l in obj.get("secondaryFiles", []):
-                self.addentry(l, c, path, subdirs)
+                self.addentry(l, c, path, remap)
         elif obj["class"] == "Directory":
             for l in obj.get("listing", []):
-                self.addentry(l, c, path + "/" + obj["basename"], subdirs)
-            subdirs.append((obj["location"], path + "/" + obj["basename"]))
+                self.addentry(l, c, path + "/" + obj["basename"], remap)
+            remap.append((obj["location"], path + "/" + obj["basename"]))
         elif obj["location"].startswith("_:") and "contents" in obj:
             with c.open(path + "/" + obj["basename"], "w") as f:
                 f.write(obj["contents"].encode("utf-8"))
@@ -154,13 +155,13 @@ class ArvPathMapper(PathMapper):
             self._pathmap[loc] = MapperEnt(urllib.quote(fn, "/:+@"), self.collection_pattern % fn[5:], cls, True)
 
         for srcobj in referenced_files:
-            subdirs = []
+            remap = []
             if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                   keep_client=self.arvrunner.keep_client,
                                                   num_retries=self.arvrunner.num_retries)
                 for l in srcobj.get("listing", []):
-                    self.addentry(l, c, ".", subdirs)
+                    self.addentry(l, c, ".", remap)
 
                 check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
                 if not check["items"]:
@@ -174,7 +175,7 @@ class ArvPathMapper(PathMapper):
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                   keep_client=self.arvrunner.keep_client,
                                                   num_retries=self.arvrunner.num_retries                                                  )
-                self.addentry(srcobj, c, ".", subdirs)
+                self.addentry(srcobj, c, ".", remap)
 
                 check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
                 if not check["items"]:
@@ -187,10 +188,13 @@ class ArvPathMapper(PathMapper):
                     ab = self.collection_pattern % c.portable_data_hash()
                     self._pathmap["_:" + unicode(uuid.uuid4())] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
 
-            if subdirs:
-                for loc, sub in subdirs:
-                    # subdirs will all start with "./", strip it off
-                    ab = self.file_pattern % (c.portable_data_hash(), sub[2:])
+            if remap:
+                for loc, sub in remap:
+                    # subdirs start with "./", strip it off
+                    if sub.startswith("./"):
+                        ab = self.file_pattern % (c.portable_data_hash(), sub[2:])
+                    else:
+                        ab = self.file_pattern % (c.portable_data_hash(), sub)
                     self._pathmap[loc] = MapperEnt("keep:%s/%s" % (c.portable_data_hash(), sub[2:]),
                                                    ab, "Directory", True)
 

commit cbe0ab4581008c47ab2dfc569e2494f4fc51f844
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Sep 28 14:44:48 2017 -0400

    12213: Add test case.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/tests/12213-keepref-expr.cwl b/sdk/cwl/tests/12213-keepref-expr.cwl
new file mode 100644
index 0000000..c28f7e8
--- /dev/null
+++ b/sdk/cwl/tests/12213-keepref-expr.cwl
@@ -0,0 +1,31 @@
+cwlVersion: v1.0
+class: ExpressionTool
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  dir: Directory
+outputs:
+  out: Directory[]
+expression: |
+  ${
+    var samples = {};
+    var pattern = /^(.+)(_S[0-9]{1,3}_)(.+)$/;
+    for (var i = 0; i < inputs.dir.listing.length; i++) {
+      var file = inputs.dir.listing[i];
+      var groups = file.basename.match(pattern);
+      if (groups) {
+        var sampleid = groups[1];
+        if (!samples[sampleid]) {
+          samples[sampleid] = [];
+        }
+        samples[sampleid].push(file);
+      }
+    }
+    var dirs = [];
+    for (var sampleid in samples) {
+      dirs.push({"class": "Directory",
+                 "basename": sampleid,
+                 "listing": samples[sampleid]});
+    }
+    return {"out": dirs};
+  }
\ No newline at end of file
diff --git a/sdk/cwl/tests/12213-keepref-job.yml b/sdk/cwl/tests/12213-keepref-job.yml
new file mode 100644
index 0000000..5c5571a
--- /dev/null
+++ b/sdk/cwl/tests/12213-keepref-job.yml
@@ -0,0 +1,3 @@
+dir:
+  class: Directory
+  location: samples
\ No newline at end of file
diff --git a/sdk/cwl/tests/12213-keepref-tool.cwl b/sdk/cwl/tests/12213-keepref-tool.cwl
new file mode 100644
index 0000000..4e55875
--- /dev/null
+++ b/sdk/cwl/tests/12213-keepref-tool.cwl
@@ -0,0 +1,11 @@
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+  InlineJavascriptRequirement: {}
+inputs:
+  fastqsdir: Directory
+outputs: []
+baseCommand: [zcat]
+arguments:
+  - $(inputs.fastqsdir.listing[0].path)
+  - $(inputs.fastqsdir.listing[1].path)
diff --git a/sdk/cwl/tests/12213-keepref-wf.cwl b/sdk/cwl/tests/12213-keepref-wf.cwl
new file mode 100644
index 0000000..0c8427a
--- /dev/null
+++ b/sdk/cwl/tests/12213-keepref-wf.cwl
@@ -0,0 +1,19 @@
+cwlVersion: v1.0
+class: Workflow
+requirements:
+  ScatterFeatureRequirement: {}
+inputs:
+  dir: Directory
+outputs: []
+steps:
+  ex:
+    in:
+      dir: dir
+    out: [out]
+    run: 12213-keepref-expr.cwl
+  tool:
+    in:
+      fastqsdir: ex/out
+    out: []
+    scatter: fastqsdir
+    run: 12213-keepref-tool.cwl
\ No newline at end of file
diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml
index d3bdefc..35efab4 100644
--- a/sdk/cwl/tests/arvados-tests.yml
+++ b/sdk/cwl/tests/arvados-tests.yml
@@ -91,3 +91,8 @@
   output: {}
   tool: noreuse.cwl
   doc: "Test arv:ReuseRequirement"
+
+- job: 12213-keepref-job.yml
+  output: {}
+  tool: 12213-keepref-wf.cwl
+  doc: "Test manipulating keep references with expression tools"

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list