[ARVADOS] updated: f6e2ed9520af6a1184160a2008ed13d3b0f2f76f

git at public.curoverse.com git at public.curoverse.com
Mon Jun 30 10:42:48 EDT 2014


Summary of changes:
 crunch_scripts/collection-merge | 58 ++++++++++++++++++++++++++++----
 crunch_scripts/run-command      | 73 +++++++++++++++++++++++++++++++++++++++++
 crunch_scripts/subst.py         | 71 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 195 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 crunch_scripts/collection-merge
 create mode 100755 crunch_scripts/run-command
 create mode 100644 crunch_scripts/subst.py

       via  f6e2ed9520af6a1184160a2008ed13d3b0f2f76f (commit)
      from  826a893776dccc6c491037831e91cdd3ecfce37f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit f6e2ed9520af6a1184160a2008ed13d3b0f2f76f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Jun 30 10:42:39 2014 -0400

    Add 'run-command' generic crunch command wrapper.  refs #2342

diff --git a/crunch_scripts/collection-merge b/crunch_scripts/collection-merge
old mode 100644
new mode 100755
index 50e90f7..f16d624
--- a/crunch_scripts/collection-merge
+++ b/crunch_scripts/collection-merge
@@ -1,13 +1,57 @@
 #!/usr/bin/env python
 
 import arvados
+import md5
+import subst
+import subprocess
+import os
+import hashlib
 
-inputs = arvados.current_job()['script_parameters']['input']
-if not isinstance(inputs, (list,tuple)):
-    inputs = [inputs]
+p = arvados.current_job()['script_parameters']
 
-out_manifest = ''
-for locator in inputs:
-    out_manifest += arvados.CollectionReader(locator).manifest_text()
+merged = ""
+src = []
+for c in p["input"]:
+    c = subst.do_substitution(p, c)
+    i = c.find('/')
+    if i == -1:
+        src.append(c)
+        merged += arvados.CollectionReader(c).manifest_text()
+    else:
+        src.append(c[0:i])
+        cr = arvados.CollectionReader(c[0:i])
+        j = c.rfind('/')
+        stream = c[i+1:j]
+        if stream == "":
+            stream = "."
+        fn = c[(j+1):]
+        for s in cr.all_streams():
+            if s.name() == stream:
+                if fn in s.files():
+                    merged += s.files()[fn].as_manifest()
 
-arvados.current_task().set_output(arvados.Keep.put(out_manifest))
+crm = arvados.CollectionReader(merged)
+
+combined = crm.manifest_text(strip=True)
+
+m = hashlib.new('md5')
+m.update(combined)
+
+uuid = "{}+{}".format(m.hexdigest(), len(combined))
+
+collection = arvados.api().collections().create(
+    body={
+        'uuid': uuid,
+        'manifest_text': crm.manifest_text(),
+    }).execute()
+
+for s in src:
+    l = arvados.api().links().create(body={
+        "link": {
+            "tail_uuid": s,
+            "head_uuid": uuid,
+            "link_class": "provenance",
+            "name": "provided"
+        }}).execute()
+
+arvados.current_task().set_output(uuid)
diff --git a/crunch_scripts/run-command b/crunch_scripts/run-command
new file mode 100755
index 0000000..528baab
--- /dev/null
+++ b/crunch_scripts/run-command
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+import arvados
+import re
+import os
+import subprocess
+import sys
+import shutil
+import subst
+
+os.umask(0077)
+
+t = arvados.current_task().tmpdir
+
+os.chdir(arvados.current_task().tmpdir)
+os.mkdir("tmpdir")
+os.mkdir("output")
+
+os.chdir("output")
+
+if len(arvados.current_task()['parameters']) > 0:
+    p = arvados.current_task()['parameters']
+else:
+    p = arvados.current_job()['script_parameters']
+
+links = []
+
+def sub_link(v):
+    r = os.path.basename(v)
+    os.symlink(os.path.join(os.environ['TASK_KEEPMOUNT'], v) , r)
+    links.append(r)
+    return r
+
+def sub_tmpdir(v):
+    return os.path.join(arvados.current_task().tmpdir, 'tmpdir')
+
+subst.default_subs["link "] = sub_link
+subst.default_subs["tmpdir"] = sub_tmpdir
+
+rcode = 1
+
+try:
+    cmd = []
+    for c in p["command"]:
+        cmd.append(subst.do_substitution(p, c))
+
+    stdoutname = None
+    stdoutfile = None
+    if "stdout" in p:
+        stdoutname = subst.do_substitution(p, p["stdout"])
+        stdoutfile = open(stdoutname, "wb")
+
+    print("Running command: {}{}".format(' '.join(cmd), (" > " + stdoutname) if stdoutname != None else ""))
+
+    rcode = subprocess.call(cmd, stdout=stdoutfile)
+
+except Exception as e:
+    print("Caught exception {}".format(e))
+
+finally:
+    for l in links:
+        os.unlink(l)
+
+    out = arvados.CollectionWriter()
+    out.write_directory_tree(".", max_manifest_depth=0)
+    arvados.current_task().set_output(out.finish())
+
+if rcode == 0:
+    os.chdir("..")
+    shutil.rmtree("tmpdir")
+    shutil.rmtree("output")
+
+sys.exit(rcode)
diff --git a/crunch_scripts/subst.py b/crunch_scripts/subst.py
new file mode 100644
index 0000000..2598e1c
--- /dev/null
+++ b/crunch_scripts/subst.py
@@ -0,0 +1,71 @@
+import os
+import glob
+
+def search(c):
+    DEFAULT = 0
+    DOLLAR = 1
+
+    i = 0
+    state = DEFAULT
+    start = None
+    depth = 0
+    while i < len(c):
+        if c[i] == '\\':
+            i += 1
+        elif state == DEFAULT:
+            if c[i] == '$':
+                state = DOLLAR
+                if depth == 0:
+                    start = i
+            elif c[i] == ')':
+                if depth == 1:
+                    return [start, i]
+                if depth > 0:
+                    depth -= 1
+        elif state == DOLLAR:
+            if c[i] == '(':
+                depth += 1
+            state = DEFAULT
+        i += 1
+    if depth != 0:
+        raise Exception("Substitution error, mismatched parentheses {}".format(c))
+    return None
+
+def sub_file(v):
+    return os.path.join(os.environ['TASK_KEEPMOUNT'], v)
+
+def sub_dir(v):
+    d = os.path.dirname(v)
+    if d == '':
+        d = v
+    return os.path.join(os.environ['TASK_KEEPMOUNT'], d)
+
+def sub_basename(v):
+    return os.path.splitext(os.path.basename(v))[0]
+
+def sub_glob(v):
+    return glob.glob(v)[0]
+
+default_subs = {"file ": sub_file,
+                "dir ": sub_dir,
+                "basename ": sub_basename,
+                "glob ": sub_glob}
+
+def do_substitution(p, c, subs=default_subs):
+    while True:
+        #print("c is", c)
+        m = search(c)
+        if m != None:
+            v = do_substitution(p, c[m[0]+2 : m[1]])
+            var = True
+            for sub in subs:
+                if v.startswith(sub):
+                    r = subs[sub](v[len(sub):])
+                    var = False
+                    break
+            if var:
+                r = p[v]
+
+            c = c[:m[0]] + r + c[m[1]+1:]
+        else:
+            return c

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list