[ARVADOS] updated: 9ce4db92fc33ecd4e98b1ff2a675969153faaf89

git at public.curoverse.com git at public.curoverse.com
Tue Dec 1 11:54:12 EST 2015


Summary of changes:
 sdk/cli/bin/crunch-job                         |  2 +-
 sdk/go/blockdigest/blockdigest.go              | 10 ----------
 sdk/go/blockdigest/blockdigest_test.go         | 24 +++++++++++++++++++-----
 sdk/go/logger/logger.go                        | 16 +++++++++-------
 sdk/go/manifest/manifest_test.go               | 24 ++++++++++++++++++------
 services/api/test/unit/crunch_dispatch_test.rb |  7 +++++--
 services/datamanager/datamanager.go            |  2 +-
 7 files changed, 53 insertions(+), 32 deletions(-)

  discards  70111b9be8ef9b85832e4d2c41f99fe18dbf0a39 (commit)
  discards  e635de14f17680bdc9bd5b00db52c93639cfd8d7 (commit)
       via  9ce4db92fc33ecd4e98b1ff2a675969153faaf89 (commit)
       via  40063c587395e310fe456e7239ae216266b368d3 (commit)
       via  128c2b5e228e1821384064ec50604a1463c29898 (commit)
       via  f04a6c74663185addff5564b47db0442679de78e (commit)
       via  efc17768290cc63cf7be7b2bccfc5caa77720846 (commit)
       via  d88316705b23d1022ee2167c7ed0e5cf5e460cbe (commit)
       via  a5ed26a2a1d01646ac511b925f56484be2e8819a (commit)
       via  040a541c74913c01ee3517273a7be30c510cc620 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (70111b9be8ef9b85832e4d2c41f99fe18dbf0a39)
            \
             N -- N -- N (9ce4db92fc33ecd4e98b1ff2a675969153faaf89)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 9ce4db92fc33ecd4e98b1ff2a675969153faaf89
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Nov 26 21:11:16 2015 -0500

    7751: Add convenience class for staging task output in $TASK_KEEPMOUNT_TMP.

diff --git a/crunch_scripts/test/task_output_dir b/crunch_scripts/test/task_output_dir
new file mode 100755
index 0000000..b177892
--- /dev/null
+++ b/crunch_scripts/test/task_output_dir
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+import arvados
+import arvados.crunch
+import hashlib
+import os
+
+out = arvados.crunch.TaskOutputDir()
+
+string = open(__file__).read()
+with open(os.path.join(out.path, 'example.out'), 'w') as f:
+    f.write(string)
+with open(os.path.join(out.path, 'example.out.SHA1'), 'w') as f:
+    f.write(hashlib.sha1(string).hexdigest() + "\n")
+
+arvados.current_task().set_output(out.manifest_text())
diff --git a/sdk/python/arvados/crunch.py b/sdk/python/arvados/crunch.py
new file mode 100644
index 0000000..c184e6a
--- /dev/null
+++ b/sdk/python/arvados/crunch.py
@@ -0,0 +1,27 @@
+import json
+import os
+
+class TaskOutputDir(object):
+    """Keep-backed directory for staging outputs of Crunch tasks.
+
+    Example, in a crunch task whose output is a file called "out.txt"
+    containing "42":
+
+        import arvados
+        import arvados.crunch
+        import os
+
+        out = arvados.crunch.TaskOutputDir()
+        with open(os.path.join(out.path, 'out.txt'), 'w') as f:
+            f.write('42')
+        arvados.current_task().set_output(out.manifest_text())
+    """
+    def __init__(self):
+        self.path = os.environ['TASK_KEEPMOUNT_TMP']
+
+    def __str__(self):
+        return self.path
+
+    def manifest_text(self):
+        snapshot = os.path.join(self.path, '.arvados#collection')
+        return json.load(open(snapshot))['manifest_text']
diff --git a/sdk/python/tests/test_crunch.py b/sdk/python/tests/test_crunch.py
new file mode 100644
index 0000000..431390b
--- /dev/null
+++ b/sdk/python/tests/test_crunch.py
@@ -0,0 +1,27 @@
+import arvados.crunch
+import os
+import shutil
+import tempfile
+import unittest
+
+class TaskOutputDirTest(unittest.TestCase):
+    def setUp(self):
+        self.tmp = tempfile.mkdtemp()
+        os.environ['TASK_KEEPMOUNT_TMP'] = self.tmp
+
+    def tearDown(self):
+        os.environ.pop('TASK_KEEPMOUNT_TMP')
+        shutil.rmtree(self.tmp)
+
+    def test_env_var(self):
+        out = arvados.crunch.TaskOutputDir()
+        self.assertEqual(out.path, self.tmp)
+
+        with open(os.path.join(self.tmp, '.arvados#collection'), 'w') as f:
+            f.write('{\n  "manifest_text":"",\n  "uuid":null\n}\n')
+        self.assertEqual(out.manifest_text(), '')
+
+        # Special file must be re-read on each call to manifest_text().
+        with open(os.path.join(self.tmp, '.arvados#collection'), 'w') as f:
+            f.write(r'{"manifest_text":". unparsed 0:3:foo\n","uuid":null}')
+        self.assertEqual(out.manifest_text(), ". unparsed 0:3:foo\n")

commit 40063c587395e310fe456e7239ae216266b368d3
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Nov 26 20:52:10 2015 -0500

    7751: Set up an arv-mount scratch directory for each task, and put its path in TASK_KEEPMOUNT_TMP.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 5177434..9de9b05 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -870,11 +870,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu};
     $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname";
     $ENV{"HOME"} = $ENV{"TASK_WORK"};
-    $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep";
     $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
     $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
     $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
 
+    my $keep_mnt = $ENV{"TASK_WORK"}.".keep";
+
     $ENV{"GZIP"} = "-n";
 
     my @srunargs = (
@@ -887,16 +888,20 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     my $stdbuf = " stdbuf --output=0 --error=0 ";
 
     my $command =
-	"if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
-        ."mkdir -p $ENV{CRUNCH_TMP} $ENV{JOB_WORK} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT} "
-	."&& cd $ENV{CRUNCH_TMP} "
+	"if [ -e \Q$ENV{TASK_WORK}\E ]; then rm -rf \Q$ENV{TASK_WORK}\E; fi; "
+        ."mkdir -p \Q$ENV{CRUNCH_TMP}\E \Q$ENV{JOB_WORK}\E \Q$ENV{TASK_WORK}\E \Q$keep_mnt\E "
+	."&& cd \Q$ENV{CRUNCH_TMP}\E "
         # These environment variables get used explicitly later in
         # $command.  No tool is expected to read these values directly.
         .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
         .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
         ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
         ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP ";
-    $command .= "&& exec arv-mount --by-pdh --crunchstat-interval=10 --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
+
+    $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other \Q$keep_mnt\E --exec ";
+    $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
+    $ENV{TASK_KEEPMOUNT_TMP} = "$keep_mnt/tmp";
+
     if ($docker_hash)
     {
       my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
@@ -917,14 +922,18 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
       $command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E ";
       $command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E ";
 
-      # Currently, we make arv-mount's mount point appear at /keep
-      # inside the container (instead of using the same path as the
-      # host like we do with CRUNCH_SRC and CRUNCH_INSTALL). However,
-      # crunch scripts and utilities must not rely on this. They must
-      # use $TASK_KEEPMOUNT.
+      # Currently, we make the "by_pdh" directory in arv-mount's mount
+      # point appear at /keep inside the container (instead of using
+      # the same path as the host like we do with CRUNCH_SRC and
+      # CRUNCH_INSTALL). However, crunch scripts and utilities must
+      # not rely on this. They must use $TASK_KEEPMOUNT.
       $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E ";
       $ENV{TASK_KEEPMOUNT} = "/keep";
 
+      # Ditto TASK_KEEPMOUNT_TMP, as /keep_tmp.
+      $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT_TMP}:/keep_tmp\E ";
+      $ENV{TASK_KEEPMOUNT_TMP} = "/keep_tmp";
+
       # TASK_WORK is almost exactly like a docker data volume: it
       # starts out empty, is writable, and persists until no
       # containers use it any more. We don't use --volumes-from to

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list