[ARVADOS] updated: e9509c501f197fb22875bef48a7fa618371f3ca4
git at public.curoverse.com
git at public.curoverse.com
Mon Aug 4 15:39:26 EDT 2014
Summary of changes:
crunch_scripts/collection-merge | 39 ++++++++++++++------------------------
crunch_scripts/decompress-all.py | 41 +++++++++++++++++++++++++---------------
2 files changed, 40 insertions(+), 40 deletions(-)
via e9509c501f197fb22875bef48a7fa618371f3ca4 (commit)
via 22383b73db60dd00bb5b9ef68b009828b59b968e (commit)
via 1c5b0ee281a30b25bc622565dac2df75f99e4863 (commit)
via 2399e2081ec59c60f6b2ddf47d7235fa30bbd4c7 (commit)
from 92f63fe18f3b6d8e4ee589e7a962d39ed4754e9e (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit e9509c501f197fb22875bef48a7fa618371f3ca4
Merge: 22383b7 1c5b0ee
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Aug 4 15:39:05 2014 -0400
Merge branch '3373-improve-gatk3-snv-pipeline' of git.qr1hi.arvadosapi.com:peter into 3373-improve-gatk3-snv-pipeline
commit 22383b73db60dd00bb5b9ef68b009828b59b968e
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Aug 4 15:38:47 2014 -0400
Simplified collection-merge. Added comments to collection-merge and decompress-all.
diff --git a/crunch_scripts/collection-merge b/crunch_scripts/collection-merge
index f16d624..63b63fa 100755
--- a/crunch_scripts/collection-merge
+++ b/crunch_scripts/collection-merge
@@ -1,5 +1,18 @@
#!/usr/bin/env python
+# collection-merge
+#
+# Merge two or more collections together. Can also be used to extract specific
+# files from a collection to produce a new collection.
+#
+# input:
+# An array of collections or collection/file paths in script_parameter["input"]
+#
+# output:
+# A manifest with the collections merged. Duplicate file names will
+# have their contents concatenated in the order that they appear in the input
+# array.
+
import arvados
import md5
import subst
@@ -30,28 +43,4 @@ for c in p["input"]:
if fn in s.files():
merged += s.files()[fn].as_manifest()
-crm = arvados.CollectionReader(merged)
-
-combined = crm.manifest_text(strip=True)
-
-m = hashlib.new('md5')
-m.update(combined)
-
-uuid = "{}+{}".format(m.hexdigest(), len(combined))
-
-collection = arvados.api().collections().create(
- body={
- 'uuid': uuid,
- 'manifest_text': crm.manifest_text(),
- }).execute()
-
-for s in src:
- l = arvados.api().links().create(body={
- "link": {
- "tail_uuid": s,
- "head_uuid": uuid,
- "link_class": "provenance",
- "name": "provided"
- }}).execute()
-
-arvados.current_task().set_output(uuid)
+arvados.current_task().set_output(merged)
diff --git a/crunch_scripts/decompress-all.py b/crunch_scripts/decompress-all.py
index 07fe2e3..0566ffb 100755
--- a/crunch_scripts/decompress-all.py
+++ b/crunch_scripts/decompress-all.py
@@ -1,5 +1,18 @@
#!/usr/bin/env python
+#
+# decompress-all.py
+#
+# Decompress all compressed files in the collection using the "dtrx" tool and
+# produce a new collection with the contents. Uncompressed files
+# are passed through.
+#
+# input:
+# A collection at script_parameters["input"]
+#
+# output:
+# A manifest of the uncompressed contents of the input collection.
+
import arvados
import re
import subprocess
commit 1c5b0ee281a30b25bc622565dac2df75f99e4863
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Aug 4 19:36:00 2014 +0000
Works now
diff --git a/crunch_scripts/decompress-all.py b/crunch_scripts/decompress-all.py
index 07fe2e3..8a0fb6f 100755
--- a/crunch_scripts/decompress-all.py
+++ b/crunch_scripts/decompress-all.py
@@ -3,6 +3,7 @@
import arvados
import re
import subprocess
+import os
arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
input_as_path=True)
@@ -11,30 +12,27 @@ task = arvados.current_task()
input_file = task['parameters']['input']
-result = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)(/.*)?$", input_file)
+result = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)(/[^/]+)$", input_file)
outdir = os.path.join(task.tmpdir, "output")
-os.mkdirs(outdir)
+os.makedirs(outdir)
os.chdir(outdir)
if result != None:
- cr = arvados.CollectionReader(re.group(1))
- streamname = '.'
- if re.group(3) != None:
- streamname += re.group(2)
- filename = re.group(3)[1:]
- else:
- filename = re.group(2)[1:]
+ cr = arvados.CollectionReader(result.group(1))
+ streamname = result.group(3)[1:]
+ filename = result.group(4)[1:]
- os.mkdirs(streamname)
+ subprocess.call(["mkdir", "-p", streamname])
os.chdir(streamname)
streamreader = filter(lambda s: s.name() == streamname, cr.all_streams())[0]
- filereader = stream.files()[filename]
- rc = subprocess.call("dtrx", "-r", "-n", arvados.get_task_param_mount('input'))
+ filereader = streamreader.files()[filename]
+ rc = subprocess.call(["dtrx", "-r", "-n", "-q", arvados.get_task_param_mount('input')])
if rc == 0:
+ out = arvados.CollectionWriter()
out.write_directory_tree(outdir, max_manifest_depth=0)
- arvados.task_set_output(out.finish())
+ task.set_output(out.finish())
else:
- arvados.task_set_output(streamname + filereader.as_manifest()[1:])
+ task.set_output(streamname + filereader.as_manifest()[1:])
else:
sys.exit(1)
commit 2399e2081ec59c60f6b2ddf47d7235fa30bbd4c7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon Aug 4 15:06:41 2014 -0400
fix parameters
diff --git a/crunch_scripts/decompress-all.py b/crunch_scripts/decompress-all.py
index a3858d2..07fe2e3 100755
--- a/crunch_scripts/decompress-all.py
+++ b/crunch_scripts/decompress-all.py
@@ -9,7 +9,7 @@ arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
task = arvados.current_task()
-input_file = arvados.gettaskparam('input')
+input_file = task['parameters']['input']
result = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)(/.*)?$", input_file)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list