[ARVADOS] updated: e9509c501f197fb22875bef48a7fa618371f3ca4

git at public.curoverse.com git at public.curoverse.com
Mon Aug 4 15:39:26 EDT 2014


Summary of changes:
 crunch_scripts/collection-merge  | 39 ++++++++++++++------------------------
 crunch_scripts/decompress-all.py | 41 +++++++++++++++++++++++++---------------
 2 files changed, 40 insertions(+), 40 deletions(-)

       via  e9509c501f197fb22875bef48a7fa618371f3ca4 (commit)
       via  22383b73db60dd00bb5b9ef68b009828b59b968e (commit)
       via  1c5b0ee281a30b25bc622565dac2df75f99e4863 (commit)
       via  2399e2081ec59c60f6b2ddf47d7235fa30bbd4c7 (commit)
      from  92f63fe18f3b6d8e4ee589e7a962d39ed4754e9e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit e9509c501f197fb22875bef48a7fa618371f3ca4
Merge: 22383b7 1c5b0ee
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Aug 4 15:39:05 2014 -0400

    Merge branch '3373-improve-gatk3-snv-pipeline' of git.qr1hi.arvadosapi.com:peter into 3373-improve-gatk3-snv-pipeline


commit 22383b73db60dd00bb5b9ef68b009828b59b968e
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Aug 4 15:38:47 2014 -0400

    Simplified collection-merge.  Added comments to collection-merge and decompress-all.

diff --git a/crunch_scripts/collection-merge b/crunch_scripts/collection-merge
index f16d624..63b63fa 100755
--- a/crunch_scripts/collection-merge
+++ b/crunch_scripts/collection-merge
@@ -1,5 +1,18 @@
 #!/usr/bin/env python
 
+# collection-merge
+#
+# Merge two or more collections together.  Can also be used to extract specific
+# files from a collection to produce a new collection.
+#
+# input:
+# An array of collections or collection/file paths in script_parameter["input"]
+#
+# output:
+# A manifest with the collections merged.  Duplicate file names will
+# have their contents concatenated in the order that they appear in the input
+# array.
+
 import arvados
 import md5
 import subst
@@ -30,28 +43,4 @@ for c in p["input"]:
                 if fn in s.files():
                     merged += s.files()[fn].as_manifest()
 
-crm = arvados.CollectionReader(merged)
-
-combined = crm.manifest_text(strip=True)
-
-m = hashlib.new('md5')
-m.update(combined)
-
-uuid = "{}+{}".format(m.hexdigest(), len(combined))
-
-collection = arvados.api().collections().create(
-    body={
-        'uuid': uuid,
-        'manifest_text': crm.manifest_text(),
-    }).execute()
-
-for s in src:
-    l = arvados.api().links().create(body={
-        "link": {
-            "tail_uuid": s,
-            "head_uuid": uuid,
-            "link_class": "provenance",
-            "name": "provided"
-        }}).execute()
-
-arvados.current_task().set_output(uuid)
+arvados.current_task().set_output(merged)
diff --git a/crunch_scripts/decompress-all.py b/crunch_scripts/decompress-all.py
index 07fe2e3..0566ffb 100755
--- a/crunch_scripts/decompress-all.py
+++ b/crunch_scripts/decompress-all.py
@@ -1,5 +1,18 @@
 #!/usr/bin/env python
 
+#
+# decompress-all.py
+#
+# Decompress all compressed files in the collection using the "dtrx" tool and
+# produce a new collection with the contents.  Uncompressed files
+# are passed through.
+#
+# input:
+# A collection at script_parameters["input"]
+#
+# output:
+# A manifest of the uncompressed contents of the input collection.
+
 import arvados
 import re
 import subprocess

commit 1c5b0ee281a30b25bc622565dac2df75f99e4863
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Aug 4 19:36:00 2014 +0000

    Works now

diff --git a/crunch_scripts/decompress-all.py b/crunch_scripts/decompress-all.py
index 07fe2e3..8a0fb6f 100755
--- a/crunch_scripts/decompress-all.py
+++ b/crunch_scripts/decompress-all.py
@@ -3,6 +3,7 @@
 import arvados
 import re
 import subprocess
+import os
 
 arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
                                           input_as_path=True)
@@ -11,30 +12,27 @@ task = arvados.current_task()
 
 input_file = task['parameters']['input']
 
-result = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)(/.*)?$", input_file)
+result = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)(/[^/]+)$", input_file)
 
 outdir = os.path.join(task.tmpdir, "output")
-os.mkdirs(outdir)
+os.makedirs(outdir)
 os.chdir(outdir)
 
 if result != None:
-    cr = arvados.CollectionReader(re.group(1))
-    streamname = '.'
-    if re.group(3) != None:
-        streamname += re.group(2)
-        filename = re.group(3)[1:]
-    else:
-        filename = re.group(2)[1:]
+    cr = arvados.CollectionReader(result.group(1))
+    streamname = result.group(3)[1:]
+    filename = result.group(4)[1:]
 
-    os.mkdirs(streamname)
+    subprocess.call(["mkdir", "-p", streamname])
     os.chdir(streamname)
     streamreader = filter(lambda s: s.name() == streamname, cr.all_streams())[0]
-    filereader = stream.files()[filename]
-    rc = subprocess.call("dtrx", "-r", "-n", arvados.get_task_param_mount('input'))
+    filereader = streamreader.files()[filename]
+    rc = subprocess.call(["dtrx", "-r", "-n", "-q", arvados.get_task_param_mount('input')])
     if rc == 0:
+        out = arvados.CollectionWriter()
         out.write_directory_tree(outdir, max_manifest_depth=0)
-        arvados.task_set_output(out.finish())
+        task.set_output(out.finish())
     else:
-        arvados.task_set_output(streamname + filereader.as_manifest()[1:])
+        task.set_output(streamname + filereader.as_manifest()[1:])
 else:
     sys.exit(1)

commit 2399e2081ec59c60f6b2ddf47d7235fa30bbd4c7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Aug 4 15:06:41 2014 -0400

    fix parameters

diff --git a/crunch_scripts/decompress-all.py b/crunch_scripts/decompress-all.py
index a3858d2..07fe2e3 100755
--- a/crunch_scripts/decompress-all.py
+++ b/crunch_scripts/decompress-all.py
@@ -9,7 +9,7 @@ arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
 
 task = arvados.current_task()
 
-input_file = arvados.gettaskparam('input')
+input_file = task['parameters']['input']
 
 result = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)(/.*)?$", input_file)
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list