[ARVADOS] updated: 031db3fcd2bbcc2d3a31345c39d7a57bf960ad69

git at public.curoverse.com git at public.curoverse.com
Sun Jul 13 14:11:58 EDT 2014


Summary of changes:
 docker/api/Dockerfile             |  17 +++++-
 docker/api/apt.arvados.org.list   |   2 +
 docker/api/crunch-dispatch-run.sh |  20 +++++++
 docker/api/munge.key              | Bin 0 -> 1024 bytes
 docker/api/slurm.conf.in          |  60 ++++++++++++++++++++
 docker/api/supervisor.conf        |  12 ++++
 docker/arvdock                    | 113 +++++++++++++++++++++++++++++++-------
 docker/build_tools/Makefile       |  32 ++++++++++-
 docker/compute/Dockerfile         |  15 +++++
 docker/{api => compute}/ssh.sh    |   0
 docker/compute/supervisor.conf    |  14 +++++
 docker/slurm/Dockerfile           |  11 ++++
 docker/slurm/munge.key            | Bin 0 -> 1024 bytes
 docker/slurm/slurm.conf.in        |  60 ++++++++++++++++++++
 docker/slurm/supervisor.conf      |  14 +++++
 15 files changed, 346 insertions(+), 24 deletions(-)
 create mode 100644 docker/api/apt.arvados.org.list
 create mode 100755 docker/api/crunch-dispatch-run.sh
 create mode 100644 docker/api/munge.key
 create mode 100644 docker/api/slurm.conf.in
 create mode 100644 docker/compute/Dockerfile
 copy docker/{api => compute}/ssh.sh (100%)
 create mode 100644 docker/compute/supervisor.conf
 create mode 100644 docker/slurm/Dockerfile
 create mode 100644 docker/slurm/munge.key
 create mode 100644 docker/slurm/slurm.conf.in
 create mode 100644 docker/slurm/supervisor.conf

       via  031db3fcd2bbcc2d3a31345c39d7a57bf960ad69 (commit)
       via  652aec8c581bb6c38bae2c98f0611b142f855e4b (commit)
      from  b93a012bf18d1327cdba3d50eb35332253a1a8de (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 031db3fcd2bbcc2d3a31345c39d7a57bf960ad69
Author: Ward Vandewege <ward at curoverse.com>
Date:   Sun Jul 13 11:04:44 2014 -0400

    Two compute nodes work at the slurm level. Crunch-dispatch runs.
    
    refs #3219

diff --git a/docker/api/slurm.conf.in b/docker/api/slurm.conf.in
index 6957e6f..7312a0e 100644
--- a/docker/api/slurm.conf.in
+++ b/docker/api/slurm.conf.in
@@ -53,8 +53,8 @@ PartitionName=DEFAULT MaxTime=INFINITE State=UP
 PartitionName=compute Default=YES Shared=yes
 #PartitionName=sysadmin Hidden=YES Shared=yes
 
-NodeName=compute[0-63]
+NodeName=compute[0-1]
 #NodeName=compute0 RealMemory=6967 Weight=6967
 
-PartitionName=compute Nodes=compute[0-63]
-PartitionName=crypto Nodes=compute[0-63]
+PartitionName=compute Nodes=compute[0-1]
+PartitionName=crypto Nodes=compute[0-1]
diff --git a/docker/arvdock b/docker/arvdock
index 544c641..abc3e8d 100755
--- a/docker/arvdock
+++ b/docker/arvdock
@@ -19,9 +19,10 @@ function usage {
     echo >&2 "  -s[port], --sso[=port]        SSO server (default port 9901)"
     echo >&2 "  -a[port], --api[=port]        API server (default port 9900)"
     echo >&2 "  -c[count], --compute[=count]  Compute nodes (default starts 2)"
-    echo >&2 "  -k, --keep                     Keep servers"
-    echo >&2 "  --ssh                          Enable SSH access to server containers"
-    echo >&2 "  -h, --help                     Display this help and exit"
+    echo >&2 "  -n, --nameserver              Nameserver"
+    echo >&2 "  -k, --keep                    Keep servers"
+    echo >&2 "  --ssh                         Enable SSH access to server containers"
+    echo >&2 "  -h, --help                    Display this help and exit"
     echo >&2
     echo >&2 "  If no options are given, the action is applied to all servers."
     echo >&2
@@ -43,10 +44,10 @@ function start_container {
     if [[ "$2" != '' ]]; then
       local name="$2"
       if [[ "$name" == "api_server" ]]; then
-        args="$args --hostname api --name $name"
+        args="$args --hostname api -P --name $name"
       elif [[ "$name" == "compute" ]]; then
         name=$name$COMPUTE_COUNTER
-        args="$args --hostname compute$COMPUTE_COUNTER --name $name"
+        args="$args --hostname compute$COMPUTE_COUNTER -P --name $name"
         let COMPUTE_COUNTER=$(($COMPUTE_COUNTER + 1))
       else
         args="$args --name $name"
@@ -77,8 +78,9 @@ function start_container {
     $DOCKER rm "$name" 2>/dev/null
 
     echo "Starting container:"
+    #echo "  $DOCKER run --dns=127.0.0.1 $args $image"
     echo "  $DOCKER run $args $image"
-    container=`$DOCKER run $args $image`
+    container=`$DOCKER run --dns=172.17.42.1 --dns-search=compute.dev.arvados $args $image`
     if [[ "$?" != "0" ]]; then
       echo "Unable to start container"
       exit 1
@@ -143,11 +145,12 @@ function do_start {
     local start_api=false
     local start_compute=false
     local start_workbench=false
+    local start_nameserver=false
     local start_keep=false
 
     # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
-    local TEMP=`getopt -o d::s::a::c::w::kh \
-                  --long doc::,sso::,api::,compute::,workbench::,keep,help,ssh \
+    local TEMP=`getopt -o d::s::a::c::w::nkh \
+                  --long doc::,sso::,api::,compute::,workbench::,nameserver,keep,help,ssh \
                   -n "$0" -- "$@"`
 
     if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -188,6 +191,10 @@ function do_start {
                     *)  start_workbench=$2; shift 2 ;;
                 esac
                 ;;
+            -n | --nameserver)
+                start_nameserver=true
+                shift
+                ;;
             -k | --keep)
                 start_keep=true
                 shift
@@ -214,6 +221,7 @@ function do_start {
           $start_api == false &&
           $start_compute == false &&
           $start_workbench == false &&
+          $start_nameserver == false &&
           $start_keep == false ]]
     then
         start_doc=9898
@@ -221,9 +229,38 @@ function do_start {
         start_api=9900
         start_compute=2
         start_workbench=9899
+        start_nameserver=true
         start_keep=true
     fi
 
+    if [[ $start_nameserver != false ]]
+    then
+      # We rely on skydock and skydns for dns discovery between the slurm controller and compute nodes,
+      # so make sure they are running
+      $DOCKER images | grep skydns >/dev/null
+      if [[ "$?" != "0" ]]; then
+        echo "Downloading crosbymichael/skydns..."
+        $DOCKER pull crosbymichael/skydns
+      fi
+      $DOCKER ps | grep skydns >/dev/null
+      if [[ "$?" != "0" ]]; then
+        echo "Starting crosbymichael/skydns container..."
+        $DOCKER rm "skydns" 2>/dev/null
+        $DOCKER run -d -p 172.17.42.1:53:53/udp --name skydns crosbymichael/skydns -nameserver 8.8.8.8:53 -nameserver 8.8.4.4 -domain arvados
+      fi
+      $DOCKER images | grep skydock >/dev/null
+      if [[ "$?" != "0" ]]; then
+        echo "Downloading crosbymichael/skydock..."
+        $DOCKER pull crosbymichael/skydock
+      fi
+      $DOCKER ps | grep skydock >/dev/null
+      if [[ "$?" != "0" ]]; then
+        echo "Starting crosbymichael/skydock container..."
+        $DOCKER rm "skydock" 2>/dev/null
+        $DOCKER run -d -v /var/run/docker.sock:/docker.sock --name skydock crosbymichael/skydock -ttl 30 -environment dev -s /docker.sock -domain arvados -name skydns
+      fi
+    fi
+
     if [[ $start_sso != false ]]
     then
         start_container "$start_sso:443" "sso_server" '' '' "arvados/sso"
@@ -287,11 +324,12 @@ function do_stop {
     local stop_api=""
     local stop_compute=""
     local stop_workbench=""
+    local stop_nameserver=""
     local stop_keep=""
 
     # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
-    local TEMP=`getopt -o dsacwkh \
-                  --long doc,sso,api,compute,workbench,keep,help \
+    local TEMP=`getopt -o dsacwnkh \
+                  --long doc,sso,api,compute,workbench,nameserver,keep,help \
                   -n "$0" -- "$@"`
 
     if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -312,6 +350,8 @@ function do_stop {
                 stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '` ; shift ;;
             -w | --workbench)
                 stop_workbench=workbench_server ; shift ;;
+            -n | --nameserver )
+                stop_keep="skydock skydns" ; shift ;;
             -k | --keep )
                 stop_keep="keep_server_0 keep_server_1" ; shift ;;
             --)
@@ -331,6 +371,7 @@ function do_stop {
           $stop_api == "" &&
           $stop_compute == "" &&
           $stop_workbench == "" &&
+          $stop_nameserver == "" &&
           $stop_keep == "" ]]
     then
         stop_doc=doc_server
@@ -338,10 +379,11 @@ function do_stop {
         stop_api=api_server
         stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '`
         stop_workbench=workbench_server
+        stop_nameserver="skydock skydns"
         stop_keep="keep_server_0 keep_server_1"
     fi
 
-    $DOCKER stop $stop_doc $stop_sso $stop_api $stop_compute $stop_workbench $stop_keep \
+    $DOCKER stop $stop_doc $stop_sso $stop_api $stop_compute $stop_workbench $stop_nameserver $stop_keep \
         2>/dev/null
 }
 
diff --git a/docker/slurm/slurm.conf.in b/docker/slurm/slurm.conf.in
index 6957e6f..7312a0e 100644
--- a/docker/slurm/slurm.conf.in
+++ b/docker/slurm/slurm.conf.in
@@ -53,8 +53,8 @@ PartitionName=DEFAULT MaxTime=INFINITE State=UP
 PartitionName=compute Default=YES Shared=yes
 #PartitionName=sysadmin Hidden=YES Shared=yes
 
-NodeName=compute[0-63]
+NodeName=compute[0-1]
 #NodeName=compute0 RealMemory=6967 Weight=6967
 
-PartitionName=compute Nodes=compute[0-63]
-PartitionName=crypto Nodes=compute[0-63]
+PartitionName=compute Nodes=compute[0-1]
+PartitionName=crypto Nodes=compute[0-1]

commit 652aec8c581bb6c38bae2c98f0611b142f855e4b
Author: Ward Vandewege <ward at curoverse.com>
Date:   Thu Jul 10 16:34:12 2014 -0400

    First set of changes to add crunch-dispatch and compute nodes. This is not complete yet.
    
    refs #3219

diff --git a/docker/api/Dockerfile b/docker/api/Dockerfile
index 99a0b4c..8f69b80 100644
--- a/docker/api/Dockerfile
+++ b/docker/api/Dockerfile
@@ -9,10 +9,24 @@ MAINTAINER Tim Pierce <twp at curoverse.com>
 # check a git repo for crunch scripts.
 #
 RUN apt-get update && \
-    apt-get -q -y install procps postgresql postgresql-server-dev-9.1 apache2 \
+    apt-get -q -y install procps postgresql postgresql-server-dev-9.1 apache2 slurm-llnl munge \
                           supervisor && \
     git clone --bare git://github.com/curoverse/arvados.git /var/cache/git/arvados.git
 
+# For crunch-dispatch
+#ADD apt.arvados.org.list /etc/apt/sources.list.d/
+
+#RUN apt-key adv --keyserver pgp.mit.edu --recv 1078ECD7 && apt-get update && \
+#RUN apt-get -q -y install libjson-perl libwww-perl libio-socket-ssl-perl libipc-system-simple-perl slurm-llnl munge
+#    apt-get -q -y install arvados-src libjson-perl libwww-perl libio-socket-ssl-perl libipc-system-simple-perl slurm-llnl munge
+
+ADD munge.key /etc/munge/
+RUN chown munge:munge /etc/munge/munge.key
+ADD generated/slurm.conf /etc/slurm-llnl/
+
+RUN /usr/local/rvm/bin/rvm-exec default gem install arvados-cli arvados
+# /for crunch-dispatch
+
 RUN /bin/mkdir -p /usr/src/arvados/services
 ADD generated/api.tar.gz /usr/src/arvados/services/
 
@@ -50,6 +64,7 @@ RUN a2dissite default && \
 # Supervisor.
 ADD supervisor.conf /etc/supervisor/conf.d/arvados.conf
 ADD ssh.sh /usr/local/bin/ssh.sh
+ADD crunch-dispatch-run.sh /usr/local/bin/crunch-dispatch-run.sh
 ADD apache2_foreground.sh /etc/apache2/foreground.sh
 
 # Start the supervisor.
diff --git a/docker/api/apt.arvados.org.list b/docker/api/apt.arvados.org.list
new file mode 100644
index 0000000..7eb8716
--- /dev/null
+++ b/docker/api/apt.arvados.org.list
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ wheezy main
diff --git a/docker/api/crunch-dispatch-run.sh b/docker/api/crunch-dispatch-run.sh
new file mode 100755
index 0000000..c16a433
--- /dev/null
+++ b/docker/api/crunch-dispatch-run.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+set -e
+export PATH="$PATH":/usr/local/arvados/src/services/crunch
+export PERLLIB=/usr/local/arvados/src/sdk/perl/lib
+export ARVADOS_API_HOST=qr1hi.arvadosapi.com
+export CRUNCH_DISPATCH_LOCKFILE=/var/lock/crunch-dispatch
+
+if [[ ! -e $CRUNCH_DISPATCH_LOCKFILE ]]; then
+  touch $CRUNCH_DISPATCH_LOCKFILE
+fi
+
+export CRUNCH_JOB_BIN=/usr/local/arvados/src/services/crunch/crunch-job
+export HOME=`pwd`
+fuser -TERM -k $CRUNCH_DISPATCH_LOCKFILE || true
+
+cd /usr/src/arvados/services/api
+export RAILS_ENV=production
+/usr/local/rvm/bin/rvm-exec default bundle install
+exec /usr/local/rvm/bin/rvm-exec default bundle exec ./script/crunch-dispatch.rb 2>&1
+
diff --git a/docker/api/munge.key b/docker/api/munge.key
new file mode 100644
index 0000000..34036a0
Binary files /dev/null and b/docker/api/munge.key differ
diff --git a/docker/api/slurm.conf.in b/docker/api/slurm.conf.in
new file mode 100644
index 0000000..6957e6f
--- /dev/null
+++ b/docker/api/slurm.conf.in
@@ -0,0 +1,60 @@
+
+ControlMachine=api
+#SlurmUser=slurmd
+SlurmctldPort=6817
+SlurmdPort=6818
+AuthType=auth/munge
+#JobCredentialPrivateKey=/etc/slurm-llnl/slurm-key.pem
+#JobCredentialPublicCertificate=/etc/slurm-llnl/slurm-cert.pem
+StateSaveLocation=/tmp
+SlurmdSpoolDir=/tmp/slurmd
+SwitchType=switch/none
+MpiDefault=none
+SlurmctldPidFile=/var/run/slurmctld.pid
+SlurmdPidFile=/var/run/slurmd.pid
+ProctrackType=proctrack/pgid
+CacheGroups=0
+ReturnToService=2
+TaskPlugin=task/affinity
+#
+# TIMERS
+SlurmctldTimeout=300
+SlurmdTimeout=300
+InactiveLimit=0
+MinJobAge=300
+KillWait=30
+Waittime=0
+#
+# SCHEDULING
+SchedulerType=sched/backfill
+#SchedulerType=sched/builtin
+SchedulerPort=7321
+#SchedulerRootFilter=
+#SelectType=select/linear
+SelectType=select/cons_res
+SelectTypeParameters=CR_CPU_Memory
+FastSchedule=1
+#
+# LOGGING
+SlurmctldDebug=3
+#SlurmctldLogFile=
+SlurmdDebug=3
+#SlurmdLogFile=
+JobCompType=jobcomp/none
+#JobCompLoc=
+JobAcctGatherType=jobacct_gather/none
+#JobAcctLogfile=
+#JobAcctFrequency=
+#
+# COMPUTE NODES
+NodeName=DEFAULT
+# CPUs=8 State=UNKNOWN RealMemory=6967 Weight=6967
+PartitionName=DEFAULT MaxTime=INFINITE State=UP
+PartitionName=compute Default=YES Shared=yes
+#PartitionName=sysadmin Hidden=YES Shared=yes
+
+NodeName=compute[0-63]
+#NodeName=compute0 RealMemory=6967 Weight=6967
+
+PartitionName=compute Nodes=compute[0-63]
+PartitionName=crypto Nodes=compute[0-63]
diff --git a/docker/api/supervisor.conf b/docker/api/supervisor.conf
index a4f9129..9c4a6a5 100644
--- a/docker/api/supervisor.conf
+++ b/docker/api/supervisor.conf
@@ -10,3 +10,15 @@ command=/usr/lib/postgresql/9.1/bin/postgres -D /var/lib/postgresql/9.1/main -c
 [program:apache2]
 command=/etc/apache2/foreground.sh
 stopsignal=6
+
+[program:munge]
+user=root
+command=/etc/init.d/munge start
+
+[program:slurm]
+user=root
+command=/etc/init.d/slurm-llnl start
+
+[program:crunch-dispatch]
+user=root
+command=/usr/local/bin/crunch-dispatch-run.sh
diff --git a/docker/arvdock b/docker/arvdock
index f2edc19..544c641 100755
--- a/docker/arvdock
+++ b/docker/arvdock
@@ -7,15 +7,18 @@ if [[ "$DOCKER" == "" ]]; then
     DOCKER=`which docker`
 fi
 
+COMPUTE_COUNTER=0
+
 function usage {
     echo >&2
     echo >&2 "usage: $0 (start|stop|restart|test) [options]"
     echo >&2
     echo >&2 "$0 start/stop/restart options:"
-    echo >&2 "  -d [port], --doc[=port]        Documentation server (default port 9898)"
-    echo >&2 "  -w [port], --workbench[=port]  Workbench server (default port 9899)"
-    echo >&2 "  -s [port], --sso[=port]        SSO server (default port 9901)"
-    echo >&2 "  -a [port], --api[=port]        API server (default port 9900)"
+    echo >&2 "  -d[port], --doc[=port]        Documentation server (default port 9898)"
+    echo >&2 "  -w[port], --workbench[=port]  Workbench server (default port 9899)"
+    echo >&2 "  -s[port], --sso[=port]        SSO server (default port 9901)"
+    echo >&2 "  -a[port], --api[=port]        API server (default port 9900)"
+    echo >&2 "  -c[count], --compute[=count]  Compute nodes (default starts 2)"
     echo >&2 "  -k, --keep                     Keep servers"
     echo >&2 "  --ssh                          Enable SSH access to server containers"
     echo >&2 "  -h, --help                     Display this help and exit"
@@ -39,7 +42,15 @@ function start_container {
     fi
     if [[ "$2" != '' ]]; then
       local name="$2"
-      args="$args --name $name"
+      if [[ "$name" == "api_server" ]]; then
+        args="$args --hostname api --name $name"
+      elif [[ "$name" == "compute" ]]; then
+        name=$name$COMPUTE_COUNTER
+        args="$args --hostname compute$COMPUTE_COUNTER --name $name"
+        let COMPUTE_COUNTER=$(($COMPUTE_COUNTER + 1))
+      else
+        args="$args --name $name"
+      fi
     fi
     if [[ "$3" != '' ]]; then
       local volume="$3"
@@ -130,12 +141,13 @@ function do_start {
     local start_doc=false
     local start_sso=false
     local start_api=false
+    local start_compute=false
     local start_workbench=false
     local start_keep=false
 
     # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
-    local TEMP=`getopt -o d::s::a::w::kh \
-                  --long doc::,sso::,api::,workbench::,keep,help,ssh \
+    local TEMP=`getopt -o d::s::a::c::w::kh \
+                  --long doc::,sso::,api::,compute::,workbench::,keep,help,ssh \
                   -n "$0" -- "$@"`
 
     if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -164,6 +176,12 @@ function do_start {
                     *)  start_api=$2; shift 2 ;;
                 esac
                 ;;
+            -c | --compute)
+                case "$2" in
+                    "") start_compute=2; shift 2 ;;
+                    *)  start_compute=$2; shift 2 ;;
+                esac
+                ;;
             -w | --workbench)
                 case "$2" in
                     "") start_workbench=9899; shift 2 ;;
@@ -194,12 +212,14 @@ function do_start {
     if [[ $start_doc == false &&
           $start_sso == false &&
           $start_api == false &&
+          $start_compute == false &&
           $start_workbench == false &&
           $start_keep == false ]]
     then
         start_doc=9898
         start_sso=9901
         start_api=9900
+        start_compute=2
         start_workbench=9899
         start_keep=true
     fi
@@ -214,6 +234,13 @@ function do_start {
         start_container "$start_api:443" "api_server" '' "sso_server:sso" "arvados/api"
     fi
 
+    if [[ $start_compute != false ]]
+    then
+        for i in `seq 0 $(($start_compute - 1))`; do
+          start_container "" "compute" '' "api_server:api" "arvados/compute"
+        done
+    fi
+
     if [[ $start_keep != false ]]
     then
         # create `keep_volumes' array with a list of keep mount points
@@ -258,12 +285,13 @@ function do_stop {
     local stop_doc=""
     local stop_sso=""
     local stop_api=""
+    local stop_compute=""
     local stop_workbench=""
     local stop_keep=""
 
     # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
-    local TEMP=`getopt -o d::s::a::w::kh \
-                  --long doc::,sso::,api::,workbench::,keep,help,ssh \
+    local TEMP=`getopt -o dsacwkh \
+                  --long doc,sso,api,compute,workbench,keep,help \
                   -n "$0" -- "$@"`
 
     if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -275,18 +303,17 @@ function do_stop {
     do
         case $1 in
             -d | --doc)
-                stop_doc=doc_server ; shift 2 ;;
+                stop_doc=doc_server ; shift ;;
             -s | --sso)
-                stop_sso=sso_server ; shift 2 ;;
+                stop_sso=sso_server ; shift ;;
             -a | --api)
-                stop_api=api_server ; shift 2 ;;
+                stop_api=api_server ; shift ;;
+            -c | --compute)
+                stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '` ; shift ;;
             -w | --workbench)
-                stop_workbench=workbench_server ; shift 2 ;;
+                stop_workbench=workbench_server ; shift ;;
             -k | --keep )
                 stop_keep="keep_server_0 keep_server_1" ; shift ;;
-            --ssh)
-                shift
-                ;;
             --)
                 shift
                 break
@@ -302,17 +329,19 @@ function do_stop {
     if [[ $stop_doc == "" &&
           $stop_sso == "" &&
           $stop_api == "" &&
+          $stop_compute == "" &&
           $stop_workbench == "" &&
           $stop_keep == "" ]]
     then
         stop_doc=doc_server
         stop_sso=sso_server
         stop_api=api_server
+        stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '`
         stop_workbench=workbench_server
         stop_keep="keep_server_0 keep_server_1"
     fi
 
-    $DOCKER stop $stop_doc $stop_sso $stop_api $stop_workbench $stop_keep \
+    $DOCKER stop $stop_doc $stop_sso $stop_api $stop_compute $stop_workbench $stop_keep \
         2>/dev/null
 }
 
diff --git a/docker/build_tools/Makefile b/docker/build_tools/Makefile
index 267e244..e2fd50d 100644
--- a/docker/build_tools/Makefile
+++ b/docker/build_tools/Makefile
@@ -1,4 +1,4 @@
-all: api-image doc-image workbench-image keep-image sso-image
+all: api-image compute-image doc-image workbench-image keep-image sso-image
 
 # `make clean' removes the files generated in the build directory
 # but does not remove any docker images generated in previous builds
@@ -24,11 +24,15 @@ BUILD = build/.buildstamp
 
 BASE_DEPS = base/Dockerfile $(BASE_GENERATED)
 
+SLURM_DEPS = slurm/Dockerfile $(SLURM_GENERATED)
+
 JOBS_DEPS = jobs/Dockerfile
 
 JAVA_BWA_SAMTOOLS_DEPS = java-bwa-samtools/Dockerfile
 
-API_DEPS = api/Dockerfile $(API_GENERATED)
+API_DEPS = api/* $(API_GENERATED)
+
+COMPUTE_DEPS = compute/* $(COMPUTE_GENERATED)
 
 DOC_DEPS = doc/Dockerfile doc/apache2_vhost
 
@@ -43,12 +47,15 @@ BCBIO_NEXTGEN_DEPS = bcbio-nextgen/Dockerfile
 
 BASE_GENERATED = base/generated/arvados.tar.gz
 
+SLURM_GENERATED = slurm/generated/*
+
 API_GENERATED = \
         api/generated/apache2_vhost \
         api/generated/config_databases.sh \
         api/generated/database.yml \
         api/generated/omniauth.rb \
         api/generated/application.yml \
+        api/generated/slurm.conf \
         api/generated/superuser_token
 
 API_GENERATED_IN = \
@@ -57,8 +64,15 @@ API_GENERATED_IN = \
         api/database.yml.in \
         api/omniauth.rb.in \
         api/application.yml.in \
+        api/slurm.conf.in \
         api/superuser_token.in
 
+SLURM_GENERATED = \
+        slurm/generated/slurm.conf
+
+SLURM_GENERATED_IN = \
+        slurm/slurm.conf.in
+
 WORKBENCH_GENERATED = \
         workbench/generated/apache2_vhost \
         workbench/generated/application.yml
@@ -88,6 +102,10 @@ $(BUILD):
 	cd build/sdk/ruby && gem build arvados.gemspec
 	touch build/.buildstamp
 
+$(SLURM_GENERATED): config.yml $(BUILD)
+	$(CONFIG_RB)
+	mkdir -p slurm/generated
+
 $(BASE_GENERATED): config.yml $(BUILD)
 	$(CONFIG_RB)
 	mkdir -p base/generated
@@ -119,6 +137,12 @@ api-image: passenger-image $(BUILD) $(API_DEPS)
 	$(DOCKER_BUILD) -t arvados/api api
 	date >api-image
 
+slurm-image: base-image $(SLURM_DEPS)
+
+compute-image: slurm-image $(BUILD) $(COMPUTE_DEPS)
+	$(DOCKER_BUILD) -t arvados/compute compute
+	date >compute-image
+
 doc-image: base-image $(BUILD) $(DOC_DEPS)
 	mkdir -p doc/generated
 	tar -czf doc/generated/doc.tar.gz -C build doc
@@ -165,6 +189,10 @@ passenger-image: base-image
 	$(DOCKER_BUILD) -t arvados/passenger passenger
 	date >passenger-image
 
+slurm-image: base-image $(SLURM_DEPS)
+	$(DOCKER_BUILD) -t arvados/slurm slurm
+	date >slurm-image
+
 base-image: debian-image $(BASE_DEPS)
 	$(DOCKER_BUILD) -t arvados/base base
 	date >base-image
diff --git a/docker/compute/Dockerfile b/docker/compute/Dockerfile
new file mode 100644
index 0000000..8c403b5
--- /dev/null
+++ b/docker/compute/Dockerfile
@@ -0,0 +1,15 @@
+# Arvados compute node Docker container.
+
+FROM arvados/slurm
+MAINTAINER Ward Vandewege <ward at curoverse.com>
+
+RUN apt-get update && apt-get -q -y install supervisor
+
+RUN /usr/local/rvm/bin/rvm-exec default gem install arvados-cli arvados
+
+# Supervisor.
+ADD supervisor.conf /etc/supervisor/conf.d/arvados.conf
+ADD ssh.sh /usr/local/bin/ssh.sh
+
+# Start the supervisor.
+CMD ["/usr/bin/supervisord", "-n"]
diff --git a/docker/compute/ssh.sh b/docker/compute/ssh.sh
new file mode 100755
index 0000000..664414b
--- /dev/null
+++ b/docker/compute/ssh.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+echo $ENABLE_SSH
+
+# Start ssh daemon if requested via the ENABLE_SSH env variable
+if [[ ! "$ENABLE_SSH" =~ (0|false|no|f|^$) ]]; then
+echo "STARTING"
+  /etc/init.d/ssh start
+fi
+
diff --git a/docker/compute/supervisor.conf b/docker/compute/supervisor.conf
new file mode 100644
index 0000000..6563b54
--- /dev/null
+++ b/docker/compute/supervisor.conf
@@ -0,0 +1,14 @@
+[program:ssh]
+user=root
+command=/usr/local/bin/ssh.sh
+startsecs=0
+
+[program:munge]
+user=root
+command=/etc/init.d/munge start
+
+[program:slurm]
+user=root
+command=/etc/init.d/slurm-llnl start
+
+
diff --git a/docker/slurm/Dockerfile b/docker/slurm/Dockerfile
new file mode 100644
index 0000000..cfd63fc
--- /dev/null
+++ b/docker/slurm/Dockerfile
@@ -0,0 +1,11 @@
+# Slurm node Docker container.
+
+FROM arvados/base
+MAINTAINER Ward Vandewege <ward at curoverse.com>
+
+RUN apt-get update && apt-get -q -y install slurm-llnl munge
+
+ADD munge.key /etc/munge/
+RUN chown munge:munge /etc/munge/munge.key
+ADD generated/slurm.conf /etc/slurm-llnl/
+
diff --git a/docker/slurm/munge.key b/docker/slurm/munge.key
new file mode 100644
index 0000000..34036a0
Binary files /dev/null and b/docker/slurm/munge.key differ
diff --git a/docker/slurm/slurm.conf.in b/docker/slurm/slurm.conf.in
new file mode 100644
index 0000000..6957e6f
--- /dev/null
+++ b/docker/slurm/slurm.conf.in
@@ -0,0 +1,60 @@
+
+ControlMachine=api
+#SlurmUser=slurmd
+SlurmctldPort=6817
+SlurmdPort=6818
+AuthType=auth/munge
+#JobCredentialPrivateKey=/etc/slurm-llnl/slurm-key.pem
+#JobCredentialPublicCertificate=/etc/slurm-llnl/slurm-cert.pem
+StateSaveLocation=/tmp
+SlurmdSpoolDir=/tmp/slurmd
+SwitchType=switch/none
+MpiDefault=none
+SlurmctldPidFile=/var/run/slurmctld.pid
+SlurmdPidFile=/var/run/slurmd.pid
+ProctrackType=proctrack/pgid
+CacheGroups=0
+ReturnToService=2
+TaskPlugin=task/affinity
+#
+# TIMERS
+SlurmctldTimeout=300
+SlurmdTimeout=300
+InactiveLimit=0
+MinJobAge=300
+KillWait=30
+Waittime=0
+#
+# SCHEDULING
+SchedulerType=sched/backfill
+#SchedulerType=sched/builtin
+SchedulerPort=7321
+#SchedulerRootFilter=
+#SelectType=select/linear
+SelectType=select/cons_res
+SelectTypeParameters=CR_CPU_Memory
+FastSchedule=1
+#
+# LOGGING
+SlurmctldDebug=3
+#SlurmctldLogFile=
+SlurmdDebug=3
+#SlurmdLogFile=
+JobCompType=jobcomp/none
+#JobCompLoc=
+JobAcctGatherType=jobacct_gather/none
+#JobAcctLogfile=
+#JobAcctFrequency=
+#
+# COMPUTE NODES
+NodeName=DEFAULT
+# CPUs=8 State=UNKNOWN RealMemory=6967 Weight=6967
+PartitionName=DEFAULT MaxTime=INFINITE State=UP
+PartitionName=compute Default=YES Shared=yes
+#PartitionName=sysadmin Hidden=YES Shared=yes
+
+NodeName=compute[0-63]
+#NodeName=compute0 RealMemory=6967 Weight=6967
+
+PartitionName=compute Nodes=compute[0-63]
+PartitionName=crypto Nodes=compute[0-63]
diff --git a/docker/slurm/supervisor.conf b/docker/slurm/supervisor.conf
new file mode 100644
index 0000000..6563b54
--- /dev/null
+++ b/docker/slurm/supervisor.conf
@@ -0,0 +1,14 @@
+[program:ssh]
+user=root
+command=/usr/local/bin/ssh.sh
+startsecs=0
+
+[program:munge]
+user=root
+command=/etc/init.d/munge start
+
+[program:slurm]
+user=root
+command=/etc/init.d/slurm-llnl start
+
+

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list