[ARVADOS] created: 900b251f2dec14fba09e5e7a859cd67933946e56

git at public.curoverse.com git at public.curoverse.com
Tue Jan 21 16:37:39 EST 2014


        at  900b251f2dec14fba09e5e7a859cd67933946e56 (commit)


commit 900b251f2dec14fba09e5e7a859cd67933946e56
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Tue Jan 21 16:38:23 2014 -0500

    New tutorial for running external programs

diff --git a/doc/_includes/run-md5sum.py b/doc/_includes/run-md5sum.py
new file mode 100644
index 0000000..31b2ef0
--- /dev/null
+++ b/doc/_includes/run-md5sum.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+import arvados
+
+arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
+this_task = arvados.current_task()
+
+# Get the input collection for this task
+this_task_input = this_task['parameters']['input']
+
+# Create a CollectionReader to access the collection
+input_collection = arvados.CollectionReader(this_task_input)
+
+# Get the name of the first file in the collection
+input_file = list(input_collection.all_files())[0].name()
+
+# Extract the file to a temporary directory
+# Returns the directory that the file was written to
+input_dir = arvados.util.collection_extract(this_task_input,
+        'tmp',
+        files=[input_file],
+        decompress=False)
+
+# Run the 'md5sum' command on the input file, with the current working
+# directory set to the location the input file was extracted to.
+stdoutdata, stderrdata = arvados.util.run_command(
+        ['md5sum', input_file],
+        cwd=input_dir)
+
+# Save the standard output (stdoutdata) "md5sum.txt" in the output collection
+out = arvados.CollectionWriter()
+out.set_current_file_name("md5sum.txt")
+out.write(stdoutdata)
+
+this_task.set_output(out.finish())
diff --git a/doc/user/tutorials/running-external-program.textile b/doc/user/tutorials/running-external-program.textile
index 44ae363..f23fae8 100644
--- a/doc/user/tutorials/running-external-program.textile
+++ b/doc/user/tutorials/running-external-program.textile
@@ -6,3 +6,60 @@ title: "Running external programs"
 navorder: 18
 ---
 
+h1. Running external programs
+
+This tutorial demonstrates how to use Crunch to run an external program by writting a wrapper using the Python SDK.
+
+*This tutorial assumes that you are "logged into an Arvados VM instance":{{site.basedoc}}/user/getting_started/ssh-access.html#login, and have a "working environment.":{{site.basedoc}}/user/getting_started/check-environment.html*
+
+Start by entering the @crunch_scripts@ directory of your git repository:
+
+<notextile>
+<pre><code>$ <span class="userinput">cd you/crunch_scripts</span>
+</code></pre>
+</notextile>
+
+Next, using your favorite text editor, create a new file called @run-md5sum.py@ in the @crunch_scripts@ directory.  Add the following code to compute the md5 hash of each file in a collection:
+
+<pre><code class="userinput">{% include run-md5sum.py %}</code></pre>
+
+Make the file executable:
+
+notextile. <pre><code>$ <span class="userinput">chmod +x run-md5sum.py</span></code></pre>
+
+Next, add the file to @git@ staging, commit and push:
+
+<notextile>
+<pre><code>$ <span class="userinput">git add run-md5sum.py</span>
+$ <span class="userinput">git commit -m"run external md5sum program"</span>
+$ <span class="userinput">git push origin master</span>
+</code></pre>
+</notextile>
+
+You should now be able to run your new script using Crunch, with "script" referring to our new "run-md5sum.py" script.
+
+<notextile>
+<pre><code>$ <span class="userinput">cat >the_job <<EOF
+{
+ "script": "run-md5sum.py",
+ "script_version": "you:master",
+ "script_parameters":
+ {
+  "input": "c1bad4b39ca5a924e481008009d94e32+210"
+ }
+}
+EOF</span>
+$ <span class="userinput">arv -h job create --job "$(cat the_job)"</span>
+{
+ ...
+ "uuid":"qr1hi-xxxxx-xxxxxxxxxxxxxxx"
+ ...
+}
+$ <span class="userinput">arv -h job get --uuid qr1hi-xxxxx-xxxxxxxxxxxxxxx</span>
+{
+ ...
+ "output":"4d164b1658c261b9afc6b479130016a3+54",
+ ...
+}
+</code></pre>
+</notextile>

commit 9e31035da243334912a5dc6143a65ac7df389c69
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Tue Jan 21 14:19:37 2014 -0500

    Started tutorial on running external programs

diff --git a/doc/user/tutorials/running-external-program.textile b/doc/user/tutorials/running-external-program.textile
new file mode 100644
index 0000000..44ae363
--- /dev/null
+++ b/doc/user/tutorials/running-external-program.textile
@@ -0,0 +1,8 @@
+---
+layout: default
+navsection: userguide
+navmenu: Tutorials
+title: "Running external programs"
+navorder: 18
+---
+

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list