[ARVADOS] updated: a0c099f41a00785b6d28a105e49f40e713e78882
git at public.curoverse.com
git at public.curoverse.com
Fri Oct 17 11:00:05 EDT 2014
Summary of changes:
doc/user/topics/arv-run.html.textile.liquid | 127 +++++++++++++++++++++-------
sdk/python/arvados/commands/run.py | 105 +++++++++++++++++------
2 files changed, 173 insertions(+), 59 deletions(-)
via a0c099f41a00785b6d28a105e49f40e713e78882 (commit)
from d071c34ca20aa86a5a053abcffb7414dbd8f4933 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit a0c099f41a00785b6d28a105e49f40e713e78882
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Oct 17 10:58:58 2014 -0400
3609: Look for parent project in current working dir and create pipeline in
that project if writable. Added --project-uuid. Uploading no longer users
arv-put because it doesn't preserve file paths. Exclude command executable
from filename rewriting. Add ability to quote individual arguments. Much more
documentation.
diff --git a/doc/user/topics/arv-run.html.textile.liquid b/doc/user/topics/arv-run.html.textile.liquid
index 0d7d8c1..91c49c8 100644
--- a/doc/user/topics/arv-run.html.textile.liquid
+++ b/doc/user/topics/arv-run.html.textile.liquid
@@ -24,16 +24,18 @@ HWI-ST1027_129_D0THKACXX.1_1.fastq:30:ATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACT
$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC HWI-ST1027_129_D0THKACXX.1_1.fastq</span>
Running pipeline qr1hi-d1hrv-mg3bju0u7r6w241
[...]
-Thu Oct 16 17:30:41 2014 qr1hi-8i9sb-8wdaabnughiolpy 13541 0 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq
-Thu Oct 16 17:30:41 2014 qr1hi-8i9sb-8wdaabnughiolpy 13541 0 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:14:TCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCCCAACCTA
-Thu Oct 16 17:30:41 2014 qr1hi-8i9sb-8wdaabnughiolpy 13541 0 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:18:AACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCT
-Thu Oct 16 17:30:41 2014 qr1hi-8i9sb-8wdaabnughiolpy 13541 0 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:30:ATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCTGTGATACG
-Thu Oct 16 17:30:42 2014 qr1hi-8i9sb-8wdaabnughiolpy 13541 0 stderr run-command: completed with exit code 0 (success)
+ 0 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq
+ 0 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:14:TCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCCCAACCTA
+ 0 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:18:AACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCT
+ 0 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:30:ATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCTGTGATACG
+ 0 stderr run-command: completed with exit code 0 (success)
[...]
</pre>
</notextile>
-A key feature of @arv-run@ is the ability to introspect the command line to determine which arguments are file inputs, and transform those paths so they are usable inside the Arvados container. In the above example, @HWI-ST1027_129_D0THKACXX.1_2.fastq@ is transformed into @/keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq at . In the above example, @arv-run@ works together with @arv-mount@ to identify that the file is already part of an Arvados collection. In this case, it will use the existing collection without any upload step. If you specify a file that is only available on the local filesystem, @arv-run@ will upload a new collection and use that.
+A key feature of @arv-run@ is the ability to introspect the command line to determine which arguments are file inputs, and transform those paths so they are usable inside the Arvados container. In the above example, @HWI-ST1027_129_D0THKACXX.1_2.fastq@ is transformed into @/keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq at . @arv-run@ also works together with @arv-mount@ to identify that the file is already part of an Arvados collection. In this case, it will use the existing collection without any upload step. If you specify a file that is only available on the local filesystem, @arv-run@ will upload a new collection.
+
+If you find that @arv-run@ is incorrectly rewriting one of your command line arguments, place a backslash @\@ at the beginning of the affected argument to quote it (suppress rewriting).
h2. Parallel tasks
@@ -45,57 +47,120 @@ HWI-ST1027_129_D0THKACXX.1_1.fastq HWI-ST1027_129_D0THKACXX.1_2.fastq
$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC -- *.fastq</span>
Running pipeline qr1hi-d1hrv-mg3bju0u7r6w241
[...]
-Thu Oct 16 19:27:42 2014 qr1hi-8i9sb-r0n6w78aq0knsoj 2331 0 stderr run-command: parallelizing on input0 with items [u'/keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq', u'/keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq']
+ 0 stderr run-command: parallelizing on input0 with items [u'/keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq', u'/keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq']
[...]
-Thu Oct 16 19:27:45 2014 qr1hi-8i9sb-r0n6w78aq0knsoj 2331 1 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq
-Thu Oct 16 19:27:46 2014 qr1hi-8i9sb-r0n6w78aq0knsoj 2331 2 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq
+ 1 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq
+ 2 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq
[...]
-Thu Oct 16 19:27:46 2014 qr1hi-8i9sb-r0n6w78aq0knsoj 2331 1 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:14:TCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCCCAACCTA
-Thu Oct 16 19:27:46 2014 qr1hi-8i9sb-r0n6w78aq0knsoj 2331 1 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:18:AACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCT
-Thu Oct 16 19:27:46 2014 qr1hi-8i9sb-r0n6w78aq0knsoj 2331 1 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:30:ATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCTGTGATACG
-Thu Oct 16 19:27:47 2014 qr1hi-8i9sb-r0n6w78aq0knsoj 2331 1 stderr run-command: completed with exit code 0 (success)
-Thu Oct 16 19:27:47 2014 qr1hi-8i9sb-r0n6w78aq0knsoj 2331 2 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq:34:CTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAG
-Thu Oct 16 19:27:47 2014 qr1hi-8i9sb-r0n6w78aq0knsoj 2331 2 stderr run-command: completed with exit code 0 (success)
+ 1 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:14:TCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCCCAACCTA
+ 1 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:18:AACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCT
+ 1 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq:30:ATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCTGTGATACG
+ 1 stderr run-command: completed with exit code 0 (success)
+ 2 stderr /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq:34:CTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGACAGCATCAACTTCTCTCACAACCTAG
+ 2 stderr run-command: completed with exit code 0 (success)
</pre>
</notextile>
-You may use also stdin @<@ redirection on multiple files. This will create a separate task for each input file. Because the syntax is designed to mimic standard shell syntax, it is necessary to quote the metacharacters @<@, @>@ and @|@ as either @\<@, @\>@ and @\|@ or @'<'@, @'>'@ and @'|'@.
+You may specify @--batch-size N@ (or the short form @-bN@) after the @--@ but before listing any files to specify how many files to provide put on the command line for each task. See "Putting it all together" below for an example.
+
+h2. Redirection
+
+You may use standard input (@<@) and standard output (@>@) redirection. This will create a separate task for each file listed in standard input. You are only permitted to supply a single file name for stdout @>@ redirection. If there are multiple tasks with their output sent to the same file, the output will be collated at the end of the pipeline.
+
+(Note: because the syntax is designed to mimic standard shell syntax, it is necessary to quote the metacharacters @<@, @>@ and @|@ as either @\<@, @\>@ and @\|@ or @'<'@, @'>'@ and @'|'@.)
<notextile>
<pre>
$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC \< *.fastq \> output.txt</span>
+[...]
+ 1 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq > output.txt
+ 2 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq > output.txt
+ 2 stderr run-command: completed with exit code 0 (success)
+ 2 stderr run-command: the following output files will be saved to keep:
+ 2 stderr run-command: 121 ./output.txt
+ 2 stderr run-command: start writing output to keep
+ 1 stderr run-command: completed with exit code 0 (success)
+ 1 stderr run-command: the following output files will be saved to keep:
+ 1 stderr run-command: 363 ./output.txt
+ 1 stderr run-command: start writing output to keep
+ 2 stderr upload wrote 121 total 121
+ 1 stderr upload wrote 363 total 363
+[..]
</pre>
</notextile>
-You are only permitted to supply a single file name for stdout @>@ redirection. If there are multiple tasks, their output will be collated at the end of the pipeline. Alternately, you may use "run-command":run-command.html parameter substitution in the file name to generate different filenames for each task.
-
-Multiple commands connected by pipes all execute in the same container. If you need to capture intermediate results of a pipe, use the @tee@ command.
-
- at arv-run@ commands always run inside a Docker image. By default, this is "arvados/jobs". Use @arv --docker-image IMG@ to specify the image to use. Note: the Docker image must be uploaded to Arvados using @arv keep docker at .
+You may use "run-command":run-command.html parameter substitution in the output file name to generate different filenames for each task:
-Use @arv-run --dry-run@ to print out the final Arvados pipeline generated by @arv-run@ without submitting it.
-
-By default, the pipeline will be submitted to your configured Arvado instance. Use @arv-run --local@ to run the command locally using "arv-crunch-job".
+<notextile>
+<pre>
+$ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC \< *.fastq \> '$(task.uuid).txt'</span>
+[...]
+ 1 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq > qr1hi-ot0gb-hmmxf2zubfpmhfk.txt
+ 2 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq > qr1hi-ot0gb-iu2xgy4hkx4mmri.txt
+ 1 stderr run-command: completed with exit code 0 (success)
+ 1 stderr run-command: the following output files will be saved to keep:
+ 1 stderr run-command: 363 ./qr1hi-ot0gb-hmmxf2zubfpmhfk.txt
+ 1 stderr run-command: start writing output to keep
+ 1 stderr upload wrote 363 total 363
+ 2 stderr run-command: completed with exit code 0 (success)
+ 2 stderr run-command: the following output files will be saved to keep:
+ 2 stderr run-command: 121 ./qr1hi-ot0gb-iu2xgy4hkx4mmri.txt
+ 2 stderr run-command: start writing output to keep
+ 2 stderr upload wrote 121 total 121
+[...]
+</pre>
+</notextile>
-You may specify @--batch-size N@ after the @--@ but before listing any files to specify how many files to provide put on the command line for each task.
+h2. Pipes
-h1. Examples
+Multiple commands may be connected by pipes and execute in the same container:
-Run one @grep@ task per file, with each input files piped from stdin. Redirect the output to output.txt.
+<notextile>
+<pre>
+$ <span class="userinput">arv-run cat -- *.fastq \| grep -H -n ATTGGAGGAAAGATGAGTGAC \> output.txt</span>
+[...]
+ 1 stderr run-command: cat /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq | grep -H -n ATTGGAGGAAAGATGAGTGAC > output.txt
+ 2 stderr run-command: cat /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_2.fastq | grep -H -n ATTGGAGGAAAGATGAGTGAC > output.txt
+[...]
+</pre>
+</notextile>
+If you need to capture intermediate results of a pipe, use the @tee@ command.
-Run @cat | grep@ once per file. Redirect the output to output.txt.
+h2. Running a shell script
<notextile>
<pre>
-$ <span class="userinput">arv-run cat -- *.fastq \| grep -H -n ATTGGAGGAAAGATGAGTGAC \> output.txt</span>
+$ <span class="userinput">echo 'echo hello world' > hello.sh</span>
+$ <span class="userinput">arv-run /bin/sh hello.sh</span>
+Upload local files: "hello.sh"
+Uploaded to qr1hi-4zz18-23u3hxugbm71qmn
+Running pipeline qr1hi-d1hrv-slcnhq5czo764b1
+[...]
+ 0 stderr run-command: /bin/sh /keep/5d3a4131b7d8f233f2a917d8a5c3c2b2+52/hello.sh
+ 0 stderr hello world
+ 0 stderr run-command: completed with exit code 0 (success)
+[...]
</pre>
</notextile>
-Run @bwa@ for pairs of fastq files in "inputs" using the reference human_g1k_v37.fasta.
+h2. Additional options
+
+* @--docker-image IMG@ : By default, commands run inside a Docker container created from the latest "arvados/jobs" Docker image. Use this option to specify a different image to use. Note: the Docker image must be uploaded to Arvados using @arv keep docker at .
+* @--dry-run@ : Print out the final Arvados pipeline generated by @arv-run@ without submitting it.
+* @--local@ : By default, the pipeline will be submitted to your configured Arvado instance. Use this option to run the command locally using @arv-run-pipeline-instance --run-jobs-here at .
+* @--ignore-rcode@ : Some commands use non-zero exit codes to indicate nonfatal conditions (e.g. @grep@ returns 1 when no match is found). Set this to indicate that commands that return non-zero return codes should not be considered failed.
+* @--no-wait@ : Do not wait and display logs after submitting command, just exit.
+
+h2. Putting it all together: bwa mem
<notextile>
<pre>
-<span class="userinput">arv-run --docker-image arvados/jobs-java-bwa-samtools bwa mem reference/human_g1k_v37.fasta -- --batch-size 2 inputs/*.fastq \> '$(task.uuid).sam'</span>
+$ <span class="userinput">cd ~/keep/by_id/d0136bc494c21f79fc1b6a390561e6cb+2778</span>
+$ <span class="userinput">arv-run --docker-image arvados/jobs-java-bwa-samtools --repository peter --script-version 3609-arv-run bwa mem ../3514b8e5da0e8d109946bc809b20a78a+5698/human_g1k_v37.fasta -- --batch-size 2 *.fastq.gz \> '$(task.uuid).sam'</span>
+ 0 stderr run-command: parallelizing on input0 with items [[u'/keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.1_1.fastq.gz', u'/keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.1_2.fastq.gz'], [u'/keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.2_1.fastq.gz', u'/keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.2_2.fastq.gz']]
+[...]
+ 1 stderr run-command: bwa mem /keep/3514b8e5da0e8d109946bc809b20a78a+5698/human_g1k_v37.fasta /keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.1_1.fastq.gz /keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.1_2.fastq.gz > qr1hi-ot0gb-a4bzzyqqz4ubair.sam
+ 2 stderr run-command: bwa mem /keep/3514b8e5da0e8d109946bc809b20a78a+5698/human_g1k_v37.fasta /keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.2_1.fastq.gz /keep/d0136bc494c21f79fc1b6a390561e6cb+2778/HWI-ST1027_129_D0THKACXX.2_2.fastq.gz > qr1hi-ot0gb-14j9ncw0ymkxq0v.sam
</pre>
</notextile>
diff --git a/sdk/python/arvados/commands/run.py b/sdk/python/arvados/commands/run.py
index 7b0cb35..475b2a9 100644
--- a/sdk/python/arvados/commands/run.py
+++ b/sdk/python/arvados/commands/run.py
@@ -16,11 +16,12 @@ logger = logging.getLogger('arvados.arv-run')
arvrun_parser = argparse.ArgumentParser()
arvrun_parser.add_argument('--dry-run', action="store_true", help="Print out the pipeline that would be submitted and exit")
-arvrun_parser.add_argument('--local', action="store_true", help="Run locally using arv-crunch-job")
+arvrun_parser.add_argument('--local', action="store_true", help="Run locally using arv-run-pipeline-instance")
arvrun_parser.add_argument('--docker-image', type=str, default="arvados/jobs", help="Docker image to use, default arvados/jobs")
-arvrun_parser.add_argument('--ignore-rcode', action="store_true", help="Set this to indicate commands that return non-zero return codes should not be considered failed.")
+arvrun_parser.add_argument('--ignore-rcode', action="store_true", help="Commands that return non-zero return codes should not be considered failed.")
arvrun_parser.add_argument('--no-reuse', action="store_true", help="Do not reuse past jobs.")
arvrun_parser.add_argument('--no-wait', action="store_true", help="Do not wait and display logs after submitting command, just exit.")
+arvrun_parser.add_argument('--project-uuid', type=str, help="Parent project of the pipeline")
arvrun_parser.add_argument('--git-dir', type=str, default="", help="Git repository passed to arv-crunch-job when using --local")
arvrun_parser.add_argument('--repository', type=str, default="arvados", help="repository field of component, default 'arvados'")
arvrun_parser.add_argument('--script-version', type=str, default="master", help="script_version field of component, default 'master'")
@@ -35,16 +36,37 @@ class UploadFile(ArvFile):
pass
def is_in_collection(root, branch):
- if root == "/":
+ try:
+ if root == "/":
+ return (None, None)
+ fn = os.path.join(root, ".arvados#collection")
+ if os.path.exists(fn):
+ with file(fn, 'r') as f:
+ c = json.load(f)
+ return (c["portable_data_hash"], branch)
+ else:
+ sp = os.path.split(root)
+ return is_in_collection(sp[0], os.path.join(sp[1], branch))
+ except:
return (None, None)
- fn = os.path.join(root, ".arvados#collection")
- if os.path.exists(fn):
- with file(fn, 'r') as f:
- c = json.load(f)
- return (c["portable_data_hash"], branch)
- else:
- sp = os.path.split(root)
- return is_in_collection(sp[0], os.path.join(sp[1], branch))
+
+def determine_project(root, current_user):
+ try:
+ if root == "/":
+ return current_user
+ fn = os.path.join(root, ".arvados#project")
+ if os.path.exists(fn):
+ with file(fn, 'r') as f:
+ c = json.load(f)
+ if 'writable_by' in c and current_user in c['writable_by']:
+ return c["uuid"]
+ else:
+ return current_user
+ else:
+ sp = os.path.split(root)
+ return determine_project(sp[0], current_user)
+ except:
+ return current_user
def statfile(prefix, fn):
absfn = os.path.abspath(fn)
@@ -91,25 +113,37 @@ def main(arguments=None):
logger.error("Can only specify a single stdout file (run-command substitutions are permitted)")
return
- patterns = [re.compile("(--[^=]+=)(.*)"),
- re.compile("(-[^=]+=)(.*)"),
+ if not args.dry_run:
+ api = arvados.api('v1')
+ if args.project_uuid:
+ project = args.project_uuid
+ else:
+ project = determine_project(os.getcwd(), api.users().current().execute()["uuid"])
+
+ patterns = [re.compile("([^=]+=)(.*)"),
re.compile("(-.)(.+)")]
- for command in slots[1:]:
- for i in xrange(0, len(command)):
- a = command[i]
- if a[0] == '-':
- # parameter starts with '-' so it might be a command line
- # parameter with a file name, do some pattern matching
+ for j, command in enumerate(slots[1:]):
+ for i, a in enumerate(command):
+ if j > 0 and i == 0:
+ # j == 0 is stdin, j > 0 is commands
+ # always skip program executable (i == 0) in commands
+ pass
+ elif a.startswith('\\'):
+ # if it starts with a \ then don't do any interpretation
+ command[i] = a[1:]
+ else:
+ # Do some pattern matching
matched = False
for p in patterns:
m = p.match(a)
if m:
command[i] = statfile(m.group(1), m.group(2))
+ matched = True
break
- else:
- # parameter might be a file, so test it
- command[i] = statfile('', a)
+ if not matched:
+ # parameter might be a file, so test it
+ command[i] = statfile('', a)
n = True
pathprefix = "/"
@@ -141,18 +175,33 @@ def main(arguments=None):
for c in files:
c.fn = c.fn[len(pathstep):]
+ orgdir = os.getcwd()
os.chdir(pathprefix)
+ print("Upload local files: \"%s\"" % '" "'.join([c.fn for c in files]))
+
if args.dry_run:
print("cd %s" % pathprefix)
- print("arv-put \"%s\"" % '" "'.join([c.fn for c in files]))
pdh = "$(input)"
else:
- pdh = put.main(["--portable-data-hash"]+[c.fn for c in files])
+ files = sorted(files, key=lambda x: x.fn)
+ collection = arvados.CollectionWriter(api, num_retries=3)
+ stream = None
+ for f in files:
+ sp = os.path.split(f.fn)
+ if sp[0] != stream:
+ stream = sp[0]
+ collection.start_new_stream(stream)
+ collection.write_file(f.fn, sp[1])
+ item = api.collections().create(body={"owner_uuid": project, "manifest_text": collection.manifest_text()}).execute()
+ pdh = item["portable_data_hash"]
+ print "Uploaded to %s" % item["uuid"]
for c in files:
c.fn = "$(file %s/%s)" % (pdh, c.fn)
+ os.chdir(orgdir)
+
for i in xrange(1, len(slots)):
slots[i] = [("%s%s" % (c.prefix, c.fn)) if isinstance(c, ArvFile) else c for c in slots[i]]
@@ -169,7 +218,7 @@ def main(arguments=None):
task_foreach = []
group_parser = argparse.ArgumentParser()
- group_parser.add_argument('--batch-size', type=int)
+ group_parser.add_argument('-b', '--batch-size', type=int)
group_parser.add_argument('args', nargs=argparse.REMAINDER)
for s in xrange(2, len(slots)):
@@ -178,7 +227,7 @@ def main(arguments=None):
inp = "input%i" % (s-2)
groupargs = group_parser.parse_args(slots[2][i+1:])
if groupargs.batch_size:
- component["script_parameters"][inp] = {"batch":groupargs.args, "size":groupargs.batch_size}
+ component["script_parameters"][inp] = {"value": {"batch":groupargs.args, "size":groupargs.batch_size}}
slots[s] = slots[s][0:i] + [{"foreach": inp, "command": "$(%s)" % inp}]
else:
component["script_parameters"][inp] = groupargs.args
@@ -203,7 +252,7 @@ def main(arguments=None):
component["script_parameters"]["task.ignore_rcode"] = args.ignore_rcode
pipeline = {
- "name": " | ".join([s[0] for s in slots[2:]]),
+ "name": "arv-run " + " | ".join([s[0] for s in slots[2:]]),
"description": "@" + " ".join(starting_args) + "@",
"components": {
"command": component
@@ -214,7 +263,7 @@ def main(arguments=None):
if args.dry_run:
print(json.dumps(pipeline, indent=4))
else:
- api = arvados.api('v1')
+ pipeline["owner_uuid"] = project
pi = api.pipeline_instances().create(body=pipeline).execute()
print "Running pipeline %s" % pi["uuid"]
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list