[ARVADOS] created: 9c94cfde607cac1264ef4e98128cf87dfb906c47
Git user
git at public.curoverse.com
Thu Sep 28 15:18:13 EDT 2017
at 9c94cfde607cac1264ef4e98128cf87dfb906c47 (commit)
commit 9c94cfde607cac1264ef4e98128cf87dfb906c47
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Sep 28 15:15:10 2017 -0400
12213: Remapping from source to destination collection needs to apply to files as well.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index c8ae77a..cea1d58 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -89,18 +89,19 @@ class ArvPathMapper(PathMapper):
for l in srcobj.get("listing", []):
self.visit(l, uploadfiles)
- def addentry(self, obj, c, path, subdirs):
+ def addentry(self, obj, c, path, remap):
if obj["location"] in self._pathmap:
src, srcpath = self.arvrunner.fs_access.get_collection(self._pathmap[obj["location"]].resolved)
if srcpath == "":
srcpath = "."
c.copy(srcpath, path + "/" + obj["basename"], source_collection=src, overwrite=True)
+ remap.append((obj["location"], path + "/" + obj["basename"]))
for l in obj.get("secondaryFiles", []):
- self.addentry(l, c, path, subdirs)
+ self.addentry(l, c, path, remap)
elif obj["class"] == "Directory":
for l in obj.get("listing", []):
- self.addentry(l, c, path + "/" + obj["basename"], subdirs)
- subdirs.append((obj["location"], path + "/" + obj["basename"]))
+ self.addentry(l, c, path + "/" + obj["basename"], remap)
+ remap.append((obj["location"], path + "/" + obj["basename"]))
elif obj["location"].startswith("_:") and "contents" in obj:
with c.open(path + "/" + obj["basename"], "w") as f:
f.write(obj["contents"].encode("utf-8"))
@@ -152,13 +153,13 @@ class ArvPathMapper(PathMapper):
self._pathmap[loc] = MapperEnt(urllib.quote(fn, "/:+@"), self.collection_pattern % fn[5:], cls, True)
for srcobj in referenced_files:
- subdirs = []
+ remap = []
if srcobj["class"] == "Directory" and srcobj["location"] not in self._pathmap:
c = arvados.collection.Collection(api_client=self.arvrunner.api,
keep_client=self.arvrunner.keep_client,
num_retries=self.arvrunner.num_retries)
for l in srcobj.get("listing", []):
- self.addentry(l, c, ".", subdirs)
+ self.addentry(l, c, ".", remap)
check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
if not check["items"]:
@@ -172,7 +173,7 @@ class ArvPathMapper(PathMapper):
c = arvados.collection.Collection(api_client=self.arvrunner.api,
keep_client=self.arvrunner.keep_client,
num_retries=self.arvrunner.num_retries )
- self.addentry(srcobj, c, ".", subdirs)
+ self.addentry(srcobj, c, ".", remap)
check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
if not check["items"]:
@@ -185,10 +186,13 @@ class ArvPathMapper(PathMapper):
ab = self.collection_pattern % c.portable_data_hash()
self._pathmap["_:" + unicode(uuid.uuid4())] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
- if subdirs:
- for loc, sub in subdirs:
- # subdirs will all start with "./", strip it off
- ab = self.file_pattern % (c.portable_data_hash(), sub[2:])
+ if remap:
+ for loc, sub in remap:
+ # subdirs start with "./", strip it off
+ if sub.startswith("./"):
+ ab = self.file_pattern % (c.portable_data_hash(), sub[2:])
+ else:
+ ab = self.file_pattern % (c.portable_data_hash(), sub)
self._pathmap[loc] = MapperEnt("keep:%s/%s" % (c.portable_data_hash(), sub[2:]),
ab, "Directory", True)
commit f824b454763a25e7d3ecc934d7b04e5c73f9fdae
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Sep 28 14:44:48 2017 -0400
12213: Add test case.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/tests/12213-keepref-expr.cwl b/sdk/cwl/tests/12213-keepref-expr.cwl
new file mode 100644
index 0000000..58f399e
--- /dev/null
+++ b/sdk/cwl/tests/12213-keepref-expr.cwl
@@ -0,0 +1,32 @@
+cwlVersion: v1.0
+class: ExpressionTool
+requirements:
+ InlineJavascriptRequirement: {}
+inputs:
+ dir: Directory
+outputs:
+ out: Directory[]
+expression: |
+ ${
+ var samples = {};
+ var pattern = /^(.+)(_S[0-9]{1,3}_)(.+)$/;
+ for (var i = 0; i < inputs.dir.listing.length; i++) {
+ var file = inputs.dir.listing[i];
+ var groups = file.basename.match(pattern);
+ if (groups) {
+ var sampleid = groups[1];
+ if (!samples[sampleid]) {
+ samples[sampleid] = [];
+ }
+ samples[sampleid].push(file);
+ }
+ }
+ var dirs = [];
+ var pattern = /Sample_([\w]+)/;
+ for (var sampleid in samples) {
+ dirs.push({"class": "Directory",
+ "basename": sampleid,
+ "listing": samples[sampleid]});
+ }
+ return {"out": dirs};
+ }
\ No newline at end of file
diff --git a/sdk/cwl/tests/12213-keepref-job.yml b/sdk/cwl/tests/12213-keepref-job.yml
new file mode 100644
index 0000000..5c5571a
--- /dev/null
+++ b/sdk/cwl/tests/12213-keepref-job.yml
@@ -0,0 +1,3 @@
+dir:
+ class: Directory
+ location: samples
\ No newline at end of file
diff --git a/sdk/cwl/tests/12213-keepref-tool.cwl b/sdk/cwl/tests/12213-keepref-tool.cwl
new file mode 100644
index 0000000..cf357c9
--- /dev/null
+++ b/sdk/cwl/tests/12213-keepref-tool.cwl
@@ -0,0 +1,31 @@
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+ InlineJavascriptRequirement: {}
+inputs:
+ fastqsdir: Directory
+outputs: []
+baseCommand: [zcat]
+arguments:
+ - valueFrom: |
+ ${
+ var pattern = /_R1_001.fastq.gz/;
+ var list = inputs.fastqsdir.listing;
+ for (var i = 0; i < list.length; i++) {
+ var file = list[i].path;
+ if (file.search(pattern) != -1) {
+ return file;
+ }
+ }
+ }
+ - valueFrom: |
+ ${
+ var pattern = /_R3_001.fastq.gz/;
+ var list = inputs.fastqsdir.listing;
+ for (var i = 0; i < list.length; i++) {
+ var file = list[i].path;
+ if (file.search(pattern) != -1) {
+ return file;
+ }
+ }
+ }
diff --git a/sdk/cwl/tests/12213-keepref-wf.cwl b/sdk/cwl/tests/12213-keepref-wf.cwl
new file mode 100644
index 0000000..0c8427a
--- /dev/null
+++ b/sdk/cwl/tests/12213-keepref-wf.cwl
@@ -0,0 +1,19 @@
+cwlVersion: v1.0
+class: Workflow
+requirements:
+ ScatterFeatureRequirement: {}
+inputs:
+ dir: Directory
+outputs: []
+steps:
+ ex:
+ in:
+ dir: dir
+ out: [out]
+ run: 12213-keepref-expr.cwl
+ tool:
+ in:
+ fastqsdir: ex/out
+ out: []
+ scatter: fastqsdir
+ run: 12213-keepref-tool.cwl
\ No newline at end of file
diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml
index d3bdefc..35efab4 100644
--- a/sdk/cwl/tests/arvados-tests.yml
+++ b/sdk/cwl/tests/arvados-tests.yml
@@ -91,3 +91,8 @@
output: {}
tool: noreuse.cwl
doc: "Test arv:ReuseRequirement"
+
+- job: 12213-keepref-job.yml
+ output: {}
+ tool: 12213-keepref-wf.cwl
+ doc: "Test manipulating keep references with expression tools"
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list