[ARVADOS] updated: 6840236215a5106641111262d00126f9d2c98991

git at public.curoverse.com git at public.curoverse.com
Mon Oct 27 11:04:49 EDT 2014


Summary of changes:
 .../pipeline_instances/_running_component.html.erb |  2 +-
 docker/arvdock                                     | 11 +++++++----
 docker/compute/Dockerfile                          | 10 +++++-----
 docker/compute/supervisor.conf                     |  5 +++++
 sdk/cli/bin/crunch-job                             | 23 +++++++++++-----------
 5 files changed, 30 insertions(+), 21 deletions(-)

  discards  22c51cdf55e895daf25fd8f8bced113f0b2be45e (commit)
       via  6840236215a5106641111262d00126f9d2c98991 (commit)
       via  e74893571b3d9682a9377432e6c771bd3bdead11 (commit)
       via  3569bc7d5b5cbc10f26e42e40525325a92da236d (commit)
       via  66bf1aa373ea284f41bd9da5622ca5e3d3cd354c (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (22c51cdf55e895daf25fd8f8bced113f0b2be45e)
            \
             N -- N -- N (6840236215a5106641111262d00126f9d2c98991)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 6840236215a5106641111262d00126f9d2c98991
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Oct 23 16:39:07 2014 -0400

    3824: Use keep volumes big enough to store the arvados/jobs docker image. Do not clobber settings.conf if not starting apiserver.

diff --git a/docker/arvdock b/docker/arvdock
index e36e5cf..3960566 100755
--- a/docker/arvdock
+++ b/docker/arvdock
@@ -136,8 +136,8 @@ function make_keep_volumes () {
     while [ ${#keep_volumes[*]} -lt 2 ]
     do
         new_keep=$(mktemp -d)
-        echo >&2 "mounting 512M tmpfs keep volume in $new_keep"
-        sudo mount -t tmpfs -o size=512M tmpfs $new_keep
+        echo >&2 "mounting 2G tmpfs keep volume in $new_keep"
+        sudo mount -t tmpfs -o size=2G tmpfs $new_keep
         mkdir $new_keep/keep
         keep_volumes+=($new_keep)
     done
@@ -311,13 +311,16 @@ function do_start {
         start_container "$start_workbench:80" "workbench_server" '' "api_server:api" "arvados/workbench"
     fi
 
-    if [ -d $HOME/.config/arvados ] || mkdir -p $HOME/.config/arvados
+    if [[ $start_api != false ]]
     then
-        cat >$HOME/.config/arvados/settings.conf <<EOF
+        if [ -d $HOME/.config/arvados ] || mkdir -p $HOME/.config/arvados
+        then
+            cat >$HOME/.config/arvados/settings.conf <<EOF
 ARVADOS_API_HOST=$(ip_address "api_server")
 ARVADOS_API_HOST_INSECURE=yes
 ARVADOS_API_TOKEN=$(cat api/generated/superuser_token)
 EOF
+        fi
     fi
 
 }

commit e74893571b3d9682a9377432e6c771bd3bdead11
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Oct 23 16:38:04 2014 -0400

    3824: Do not say "There are  jobs in the queue ahead of this one" if queue_position is unknown.

diff --git a/apps/workbench/app/views/pipeline_instances/_running_component.html.erb b/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
index caa8377..85a1530 100644
--- a/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
+++ b/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
@@ -46,7 +46,7 @@
                   This job is next in the queue to run.
                 <% elsif current_job[:queue_position] == 1 %>
                   There is 1 job in the queue ahead of this one.
-                <% else %>
+                <% elsif current_job[:queue_position] %>
                   There are <%= current_job[:queue_position] %> jobs in the queue ahead of this one.
                 <% end %>
               <% rescue %>

commit 3569bc7d5b5cbc10f26e42e40525325a92da236d
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Oct 23 16:35:05 2014 -0400

    3824: Add dnsmasq to compute images, to make docker-in-docker work. refs #3333

diff --git a/docker/compute/Dockerfile b/docker/compute/Dockerfile
index 2f6979c..792fd28 100644
--- a/docker/compute/Dockerfile
+++ b/docker/compute/Dockerfile
@@ -3,7 +3,7 @@
 FROM arvados/slurm
 MAINTAINER Ward Vandewege <ward at curoverse.com>
 
-RUN apt-get update && apt-get -qqy install supervisor python-pip python-pyvcf python-gflags python-google-api-python-client python-virtualenv libattr1-dev libfuse-dev python-dev python-llfuse fuse crunchstat python-arvados-fuse cron
+RUN apt-get update && apt-get -qqy install supervisor python-pip python-pyvcf python-gflags python-google-api-python-client python-virtualenv libattr1-dev libfuse-dev python-dev python-llfuse fuse crunchstat python-arvados-fuse cron dnsmasq
 
 ADD fuse.conf /etc/fuse.conf
 
diff --git a/docker/compute/supervisor.conf b/docker/compute/supervisor.conf
index f2cce3f..7fc34fc 100644
--- a/docker/compute/supervisor.conf
+++ b/docker/compute/supervisor.conf
@@ -27,3 +27,8 @@ startsecs=0
 user=root
 command=/usr/local/bin/wrapdocker.sh
 
+[program:dnsmasq]
+user=root
+command=/etc/init.d/dnsmasq start
+startsecs=0
+

commit 66bf1aa373ea284f41bd9da5622ca5e3d3cd354c
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Oct 23 13:19:31 2014 -0400

    3824: Use a docker volume for TASK_WORK, clean up env var and tmp dir handling, improve logging.

diff --git a/docker/compute/Dockerfile b/docker/compute/Dockerfile
index 929c136..2f6979c 100644
--- a/docker/compute/Dockerfile
+++ b/docker/compute/Dockerfile
@@ -9,11 +9,11 @@ ADD fuse.conf /etc/fuse.conf
 
 RUN /usr/local/rvm/bin/rvm-exec default gem install arvados-cli arvados
 
-# Install Docker from the Docker Inc. repository
+# Install Docker from the Arvados package repository
 RUN apt-get update -qq && apt-get install -qqy iptables ca-certificates lxc apt-transport-https
-RUN echo deb https://get.docker.io/ubuntu docker main > /etc/apt/sources.list.d/docker.list
-RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 36A1D7869245C8950F966E92D8576A8BA88D21E9
-RUN apt-get update -qq && apt-get install -qqy lxc-docker
+ADD apt.arvados.org.list /etc/apt/sources.list.d/apt.arvados.org.list
+RUN apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7
+RUN apt-get update -qq && apt-get install -qqy docker.io
 
 RUN addgroup --gid 4005 crunch && mkdir /home/crunch && useradd --uid 4005 --gid 4005 crunch && usermod crunch -G fuse,docker && chown crunch:crunch /home/crunch
 
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 9bad7ae..ffd3ec9 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -357,7 +357,7 @@ if (!defined $no_clear_tmp) {
   if ($cleanpid == 0)
   {
     srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
-          ['bash', '-c', 'if mount | grep -q $JOB_WORK/; then for i in $JOB_WORK/*keep; do /bin/fusermount -z -u $i; done; fi; sleep 1; rm -rf $JOB_WORK $CRUNCH_TMP/opt $CRUNCH_TMP/src*']);
+          ['bash', '-c', 'if mount | grep -q $JOB_WORK/; then for i in $JOB_WORK/*keep $CRUNCH_TMP/task/*.keep; do /bin/fusermount -z -u $i; done; fi; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src*']);
     exit (1);
   }
   while (1)
@@ -547,8 +547,6 @@ else {
   my @execargs = ("sh", "-c",
                   "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
 
-  # Note: this section is almost certainly unnecessary if we're
-  # running tasks in docker containers.
   my $installpid = fork();
   if ($installpid == 0)
   {
@@ -694,7 +692,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     }
     $ENV{"TASK_SLOT_NODE"} = $slot[$childslot]->{node}->{name};
     $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu};
-    $ENV{"TASK_WORK"} = $ENV{"JOB_WORK"}."/$id.$$";
+    $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname";
     $ENV{"HOME"} = $ENV{"TASK_WORK"};
     $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep";
     $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
@@ -723,36 +721,53 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     $command .= "&& exec arv-mount --by-id --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
     if ($docker_hash)
     {
-      $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$ENV{TASK_WORK}/docker.cid -poll=10000 ";
-      $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --user=crunch --cidfile=$ENV{TASK_WORK}/docker.cid ";
+      my $cidfile = "$ENV{CRUNCH_TMP}/$ENV{TASK_UUID}.cid";
+      $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
+      $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i --user=crunch --cidfile=$cidfile ";
+
       # Dynamically configure the container to use the host system as its
       # DNS server.  Get the host's global addresses from the ip command,
       # and turn them into docker --dns options using gawk.
       $command .=
           q{$(ip -o address show scope global |
               gawk 'match($4, /^([0-9\.:]+)\//, x){print "--dns", x[1]}') };
-      $command .= "--volume=\Q$ENV{CRUNCH_SRC}:/tmp/crunch-src:ro\E ";
+
+      # The source tree and $destdir directory (which we have
+      # installed on the worker host) are available in the container,
+      # under the same path.
+      $command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E ";
+      $command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E ";
+
+      # For some reason we make arv-mount's mount point appear at
+      # /keep inside the container, instead of using the same path as
+      # the host and expecting the task to pay attention to
+      # $TASK_KEEPMOUNT like we do with everything else.
       $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E ";
-      $command .= "--env=\QHOME=/home/crunch\E ";
+      $ENV{TASK_KEEPMOUNT} = "/keep";
+
+      # TASK_WORK is a plain docker data volume: it starts out empty,
+      # is writable, and persists until no containers use it any
+      # more. We don't use --volumes-from to share it with other
+      # containers: it is only accessible to this task, and it goes
+      # away when this task stops.
+      $command .= "--volume=\Q$ENV{TASK_WORK}\E ";
+
+      # JOB_WORK is also a plain docker data volume for now. TODO:
+      # Share a single JOB_WORK volume across all task containers on a
+      # given worker node, and delete it when the job ends (and, in
+      # case that doesn't work, when the next job starts).
+      $command .= "--volume=\Q$ENV{JOB_WORK}\E ";
+
       while (my ($env_key, $env_val) = each %ENV)
       {
-        if ($env_key =~ /^(ARVADOS|JOB|TASK)_/) {
-          if ($env_key eq "TASK_WORK") {
-            $command .= "--env=\QTASK_WORK=/tmp/crunch-job\E ";
-          }
-          elsif ($env_key eq "TASK_KEEPMOUNT") {
-            $command .= "--env=\QTASK_KEEPMOUNT=/keep\E ";
-          }
-          else {
-            $command .= "--env=\Q$env_key=$env_val\E ";
-          }
+        if ($env_key =~ /^(ARVADOS|CRUNCH|JOB|TASK)_/) {
+          $command .= "--env=\Q$env_key=$env_val\E ";
         }
       }
-      $command .= "--env=\QCRUNCH_NODE_SLOTS=$ENV{CRUNCH_NODE_SLOTS}\E ";
-      $command .= "--env=\QCRUNCH_SRC=/tmp/crunch-src\E ";
+      $command .= "--env=\QHOME=$ENV{HOME}\E ";
       $command .= "\Q$docker_hash\E ";
       $command .= "stdbuf --output=0 --error=0 ";
-      $command .= "/tmp/crunch-src/crunch_scripts/" . $Job->{"script"};
+      $command .= "$ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
     } else {
       # Non-docker run
       $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 ";
@@ -763,8 +778,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     my @execargs = ('bash', '-c', $command);
     srun (\@srunargs, \@execargs, undef, $build_script_to_send);
     # exec() failed, we assume nothing happened.
-    Log(undef, "srun() failed on build script");
-    die;
+    die "srun() failed on build script\n";
   }
   close("writer");
   if (!defined $childpid)
@@ -1555,11 +1569,11 @@ sub srun
   my $opts = shift || {};
   my $stdin = shift;
   my $args = $have_slurm ? [@$srunargs, @$execargs] : $execargs;
-  print STDERR (join (" ",
-		      map { / / ? "'$_'" : $_ }
-		      (@$args)),
-		"\n")
-      if $ENV{CRUNCH_DEBUG};
+
+  my $show_cmd = "@{$args}";
+  $show_cmd =~ s/(TOKEN\\*=)\S+/${1}[...]/g;
+  $show_cmd =~ s/\n/ /g;
+  warn "starting: $show_cmd\n";
 
   if (defined $stdin) {
     my $child = open STDIN, "-|";
@@ -1692,7 +1706,7 @@ __DATA__
 # checkout-and-build
 
 use Fcntl ':flock';
-use File::Path qw( make_path );
+use File::Path qw( make_path remove_tree );
 
 my $destdir = $ENV{"CRUNCH_SRC"};
 my $commit = $ENV{"CRUNCH_SRC_COMMIT"};
@@ -1700,12 +1714,18 @@ my $repo = $ENV{"CRUNCH_SRC_URL"};
 my $task_work = $ENV{"TASK_WORK"};
 
 for my $dir ($destdir, $task_work) {
-    if ($dir) {
-        make_path $dir;
-        -e $dir or die "Failed to create temporary directory ($dir): $!";
-    }
+  if ($dir) {
+    make_path $dir;
+    -e $dir or die "Failed to create temporary directory ($dir): $!";
+  }
 }
 
+if ($task_work) {
+  remove_tree($task_work, {keep_root => 1});
+  chmod 01777, $task_work;
+}
+
+
 open L, ">", "$destdir.lock" or die "$destdir.lock: $!";
 flock L, LOCK_EX;
 if (readlink ("$destdir.commit") eq $commit && -d $destdir) {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list