[ARVADOS] updated: 70111b9be8ef9b85832e4d2c41f99fe18dbf0a39

git at public.curoverse.com git at public.curoverse.com
Thu Nov 26 21:11:06 EST 2015

Summary of changes:
 apps/workbench/test/helpers/download_helper.rb |  4 ++-
 apps/workbench/test/integration_helper.rb      | 34 +++++++++++++++-----------
 2 files changed, 23 insertions(+), 15 deletions(-)

  discards  882b71e217a39d77e9c34b9fcba59d7fd85b51d4 (commit)
  discards  326f9744f60d14566aa38a89f1e3e2999de9ab7c (commit)
  discards  5cb661ba5347e7c78c09d49a29e78907afd45896 (commit)
  discards  86ac7a774a81b1e081dc68e44826ad0615e3c3ed (commit)
       via  70111b9be8ef9b85832e4d2c41f99fe18dbf0a39 (commit)
       via  e635de14f17680bdc9bd5b00db52c93639cfd8d7 (commit)
       via  40f063c4f6aa35c641650c80751392292e151acb (commit)
       via  6936d2cddf2874a7113159e9adcae8c8e67dc48f (commit)
       via  a1592e2c6d6e9768a1918f3c59b74d6d19033a55 (commit)
       via  8d7f3c9ae0648719781174946ea89c3505aabd22 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (882b71e217a39d77e9c34b9fcba59d7fd85b51d4)
             N -- N -- N (70111b9be8ef9b85832e4d2c41f99fe18dbf0a39)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

commit 70111b9be8ef9b85832e4d2c41f99fe18dbf0a39
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Nov 26 21:11:16 2015 -0500

    7751: Add convenience class for staging task output in $TASK_KEEPMOUNT_TMP.

diff --git a/crunch_scripts/test/task_output_dir b/crunch_scripts/test/task_output_dir
new file mode 100755
index 0000000..b177892
--- /dev/null
+++ b/crunch_scripts/test/task_output_dir
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+import arvados
+import arvados.crunch
+import hashlib
+import os
+out = arvados.crunch.TaskOutputDir()
+string = open(__file__).read()
+with open(os.path.join(out.path, 'example.out'), 'w') as f:
+    f.write(string)
+with open(os.path.join(out.path, 'example.out.SHA1'), 'w') as f:
+    f.write(hashlib.sha1(string).hexdigest() + "\n")
diff --git a/sdk/python/arvados/crunch.py b/sdk/python/arvados/crunch.py
new file mode 100644
index 0000000..c184e6a
--- /dev/null
+++ b/sdk/python/arvados/crunch.py
@@ -0,0 +1,27 @@
+import json
+import os
+class TaskOutputDir(object):
+    """Keep-backed directory for staging outputs of Crunch tasks.
+    Example, in a crunch task whose output is a file called "out.txt"
+    containing "42":
+        import arvados
+        import arvados.crunch
+        import os
+        out = arvados.crunch.TaskOutputDir()
+        with open(os.path.join(out.path, 'out.txt'), 'w') as f:
+            f.write('42')
+        arvados.current_task().set_output(out.manifest_text())
+    """
+    def __init__(self):
+        self.path = os.environ['TASK_KEEPMOUNT_TMP']
+    def __str__(self):
+        return self.path
+    def manifest_text(self):
+        snapshot = os.path.join(self.path, '.arvados#collection')
+        return json.load(open(snapshot))['manifest_text']
diff --git a/sdk/python/tests/test_crunch.py b/sdk/python/tests/test_crunch.py
new file mode 100644
index 0000000..431390b
--- /dev/null
+++ b/sdk/python/tests/test_crunch.py
@@ -0,0 +1,27 @@
+import arvados.crunch
+import os
+import shutil
+import tempfile
+import unittest
+class TaskOutputDirTest(unittest.TestCase):
+    def setUp(self):
+        self.tmp = tempfile.mkdtemp()
+        os.environ['TASK_KEEPMOUNT_TMP'] = self.tmp
+    def tearDown(self):
+        os.environ.pop('TASK_KEEPMOUNT_TMP')
+        shutil.rmtree(self.tmp)
+    def test_env_var(self):
+        out = arvados.crunch.TaskOutputDir()
+        self.assertEqual(out.path, self.tmp)
+        with open(os.path.join(self.tmp, '.arvados#collection'), 'w') as f:
+            f.write('{\n  "manifest_text":"",\n  "uuid":null\n}\n')
+        self.assertEqual(out.manifest_text(), '')
+        # Special file must be re-read on each call to manifest_text().
+        with open(os.path.join(self.tmp, '.arvados#collection'), 'w') as f:
+            f.write(r'{"manifest_text":". unparsed 0:3:foo\n","uuid":null}')
+        self.assertEqual(out.manifest_text(), ". unparsed 0:3:foo\n")

commit e635de14f17680bdc9bd5b00db52c93639cfd8d7
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Nov 26 20:52:10 2015 -0500

    7751: Set up an arv-mount scratch directory for each task, and put its path in TASK_KEEPMOUNT_TMP.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 5177434..53695e4 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -870,11 +870,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu};
     $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname";
     $ENV{"HOME"} = $ENV{"TASK_WORK"};
-    $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep";
     $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
     $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
     $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
+    my $keep_mnt = $ENV{"TASK_WORK"}.".keep";
     $ENV{"GZIP"} = "-n";
     my @srunargs = (
@@ -887,16 +888,20 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     my $stdbuf = " stdbuf --output=0 --error=0 ";
     my $command =
-	"if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
-	."&& cd $ENV{CRUNCH_TMP} "
+	"if [ -e \Q$ENV{TASK_WORK}\E ]; then rm -rf \Q$ENV{TASK_WORK}\E; fi; "
+        ."mkdir -p \Q$ENV{CRUNCH_TMP}\E \Q$ENV{JOB_WORK}\E \Q$ENV{TASK_WORK}\E \Q$keep_mnt\E "
+	."&& cd \Q$ENV{CRUNCH_TMP}\E "
         # These environment variables get used explicitly later in
         # $command.  No tool is expected to read these values directly.
         .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
         .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
         ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
         ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP ";
-    $command .= "&& exec arv-mount --by-pdh --crunchstat-interval=10 --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
+    $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other \Q$keep_mnt\E --exec ";
+    $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
+    $ENV{TASK_KEEPMOUNT_TMP} = "$keep_mnt/tmp";
     if ($docker_hash)
       my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
@@ -917,14 +922,18 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
       $command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E ";
       $command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E ";
-      # Currently, we make arv-mount's mount point appear at /keep
-      # inside the container (instead of using the same path as the
-      # host like we do with CRUNCH_SRC and CRUNCH_INSTALL). However,
-      # crunch scripts and utilities must not rely on this. They must
-      # use $TASK_KEEPMOUNT.
+      # Currently, we make the "by_pdh" directory in arv-mount's mount
+      # point appear at /keep inside the container (instead of using
+      # the same path as the host like we do with CRUNCH_SRC and
+      # CRUNCH_INSTALL). However, crunch scripts and utilities must
+      # not rely on this. They must use $TASK_KEEPMOUNT.
       $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E ";
       $ENV{TASK_KEEPMOUNT} = "/keep";
+      # Ditto TASK_KEEPMOUNT_TMP, as /keep_tmp.
+      $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT_TMP}:/keep_tmp:ro\E ";
+      $ENV{TASK_KEEPMOUNT_TMP} = "/keep_tmp";
       # TASK_WORK is almost exactly like a docker data volume: it
       # starts out empty, is writable, and persists until no
       # containers use it any more. We don't use --volumes-from to



More information about the arvados-commits mailing list