[ARVADOS] updated: 031db3fcd2bbcc2d3a31345c39d7a57bf960ad69
git at public.curoverse.com
git at public.curoverse.com
Sun Jul 13 14:11:58 EDT 2014
Summary of changes:
docker/api/Dockerfile | 17 +++++-
docker/api/apt.arvados.org.list | 2 +
docker/api/crunch-dispatch-run.sh | 20 +++++++
docker/api/munge.key | Bin 0 -> 1024 bytes
docker/api/slurm.conf.in | 60 ++++++++++++++++++++
docker/api/supervisor.conf | 12 ++++
docker/arvdock | 113 +++++++++++++++++++++++++++++++-------
docker/build_tools/Makefile | 32 ++++++++++-
docker/compute/Dockerfile | 15 +++++
docker/{api => compute}/ssh.sh | 0
docker/compute/supervisor.conf | 14 +++++
docker/slurm/Dockerfile | 11 ++++
docker/slurm/munge.key | Bin 0 -> 1024 bytes
docker/slurm/slurm.conf.in | 60 ++++++++++++++++++++
docker/slurm/supervisor.conf | 14 +++++
15 files changed, 346 insertions(+), 24 deletions(-)
create mode 100644 docker/api/apt.arvados.org.list
create mode 100755 docker/api/crunch-dispatch-run.sh
create mode 100644 docker/api/munge.key
create mode 100644 docker/api/slurm.conf.in
create mode 100644 docker/compute/Dockerfile
copy docker/{api => compute}/ssh.sh (100%)
create mode 100644 docker/compute/supervisor.conf
create mode 100644 docker/slurm/Dockerfile
create mode 100644 docker/slurm/munge.key
create mode 100644 docker/slurm/slurm.conf.in
create mode 100644 docker/slurm/supervisor.conf
via 031db3fcd2bbcc2d3a31345c39d7a57bf960ad69 (commit)
via 652aec8c581bb6c38bae2c98f0611b142f855e4b (commit)
from b93a012bf18d1327cdba3d50eb35332253a1a8de (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 031db3fcd2bbcc2d3a31345c39d7a57bf960ad69
Author: Ward Vandewege <ward at curoverse.com>
Date: Sun Jul 13 11:04:44 2014 -0400
Two compute nodes work at the slurm level. Crunch-dispatch runs.
refs #3219
diff --git a/docker/api/slurm.conf.in b/docker/api/slurm.conf.in
index 6957e6f..7312a0e 100644
--- a/docker/api/slurm.conf.in
+++ b/docker/api/slurm.conf.in
@@ -53,8 +53,8 @@ PartitionName=DEFAULT MaxTime=INFINITE State=UP
PartitionName=compute Default=YES Shared=yes
#PartitionName=sysadmin Hidden=YES Shared=yes
-NodeName=compute[0-63]
+NodeName=compute[0-1]
#NodeName=compute0 RealMemory=6967 Weight=6967
-PartitionName=compute Nodes=compute[0-63]
-PartitionName=crypto Nodes=compute[0-63]
+PartitionName=compute Nodes=compute[0-1]
+PartitionName=crypto Nodes=compute[0-1]
diff --git a/docker/arvdock b/docker/arvdock
index 544c641..abc3e8d 100755
--- a/docker/arvdock
+++ b/docker/arvdock
@@ -19,9 +19,10 @@ function usage {
echo >&2 " -s[port], --sso[=port] SSO server (default port 9901)"
echo >&2 " -a[port], --api[=port] API server (default port 9900)"
echo >&2 " -c[count], --compute[=count] Compute nodes (default starts 2)"
- echo >&2 " -k, --keep Keep servers"
- echo >&2 " --ssh Enable SSH access to server containers"
- echo >&2 " -h, --help Display this help and exit"
+ echo >&2 " -n, --nameserver Nameserver"
+ echo >&2 " -k, --keep Keep servers"
+ echo >&2 " --ssh Enable SSH access to server containers"
+ echo >&2 " -h, --help Display this help and exit"
echo >&2
echo >&2 " If no options are given, the action is applied to all servers."
echo >&2
@@ -43,10 +44,10 @@ function start_container {
if [[ "$2" != '' ]]; then
local name="$2"
if [[ "$name" == "api_server" ]]; then
- args="$args --hostname api --name $name"
+ args="$args --hostname api -P --name $name"
elif [[ "$name" == "compute" ]]; then
name=$name$COMPUTE_COUNTER
- args="$args --hostname compute$COMPUTE_COUNTER --name $name"
+ args="$args --hostname compute$COMPUTE_COUNTER -P --name $name"
let COMPUTE_COUNTER=$(($COMPUTE_COUNTER + 1))
else
args="$args --name $name"
@@ -77,8 +78,9 @@ function start_container {
$DOCKER rm "$name" 2>/dev/null
echo "Starting container:"
+ #echo " $DOCKER run --dns=127.0.0.1 $args $image"
echo " $DOCKER run $args $image"
- container=`$DOCKER run $args $image`
+ container=`$DOCKER run --dns=172.17.42.1 --dns-search=compute.dev.arvados $args $image`
if [[ "$?" != "0" ]]; then
echo "Unable to start container"
exit 1
@@ -143,11 +145,12 @@ function do_start {
local start_api=false
local start_compute=false
local start_workbench=false
+ local start_nameserver=false
local start_keep=false
# NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
- local TEMP=`getopt -o d::s::a::c::w::kh \
- --long doc::,sso::,api::,compute::,workbench::,keep,help,ssh \
+ local TEMP=`getopt -o d::s::a::c::w::nkh \
+ --long doc::,sso::,api::,compute::,workbench::,nameserver,keep,help,ssh \
-n "$0" -- "$@"`
if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -188,6 +191,10 @@ function do_start {
*) start_workbench=$2; shift 2 ;;
esac
;;
+ -n | --nameserver)
+ start_nameserver=true
+ shift
+ ;;
-k | --keep)
start_keep=true
shift
@@ -214,6 +221,7 @@ function do_start {
$start_api == false &&
$start_compute == false &&
$start_workbench == false &&
+ $start_nameserver == false &&
$start_keep == false ]]
then
start_doc=9898
@@ -221,9 +229,38 @@ function do_start {
start_api=9900
start_compute=2
start_workbench=9899
+ start_nameserver=true
start_keep=true
fi
+ if [[ $start_nameserver != false ]]
+ then
+ # We rely on skydock and skydns for dns discovery between the slurm controller and compute nodes,
+ # so make sure they are running
+ $DOCKER images | grep skydns >/dev/null
+ if [[ "$?" != "0" ]]; then
+ echo "Downloading crosbymichael/skydns..."
+ $DOCKER pull crosbymichael/skydns
+ fi
+ $DOCKER ps | grep skydns >/dev/null
+ if [[ "$?" != "0" ]]; then
+ echo "Starting crosbymichael/skydns container..."
+ $DOCKER rm "skydns" 2>/dev/null
+ $DOCKER run -d -p 172.17.42.1:53:53/udp --name skydns crosbymichael/skydns -nameserver 8.8.8.8:53 -nameserver 8.8.4.4 -domain arvados
+ fi
+ $DOCKER images | grep skydock >/dev/null
+ if [[ "$?" != "0" ]]; then
+ echo "Downloading crosbymichael/skydock..."
+ $DOCKER pull crosbymichael/skydock
+ fi
+ $DOCKER ps | grep skydock >/dev/null
+ if [[ "$?" != "0" ]]; then
+ echo "Starting crosbymichael/skydock container..."
+ $DOCKER rm "skydock" 2>/dev/null
+ $DOCKER run -d -v /var/run/docker.sock:/docker.sock --name skydock crosbymichael/skydock -ttl 30 -environment dev -s /docker.sock -domain arvados -name skydns
+ fi
+ fi
+
if [[ $start_sso != false ]]
then
start_container "$start_sso:443" "sso_server" '' '' "arvados/sso"
@@ -287,11 +324,12 @@ function do_stop {
local stop_api=""
local stop_compute=""
local stop_workbench=""
+ local stop_nameserver=""
local stop_keep=""
# NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
- local TEMP=`getopt -o dsacwkh \
- --long doc,sso,api,compute,workbench,keep,help \
+ local TEMP=`getopt -o dsacwnkh \
+ --long doc,sso,api,compute,workbench,nameserver,keep,help \
-n "$0" -- "$@"`
if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -312,6 +350,8 @@ function do_stop {
stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '` ; shift ;;
-w | --workbench)
stop_workbench=workbench_server ; shift ;;
+ -n | --nameserver )
+ stop_keep="skydock skydns" ; shift ;;
-k | --keep )
stop_keep="keep_server_0 keep_server_1" ; shift ;;
--)
@@ -331,6 +371,7 @@ function do_stop {
$stop_api == "" &&
$stop_compute == "" &&
$stop_workbench == "" &&
+ $stop_nameserver == "" &&
$stop_keep == "" ]]
then
stop_doc=doc_server
@@ -338,10 +379,11 @@ function do_stop {
stop_api=api_server
stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '`
stop_workbench=workbench_server
+ stop_nameserver="skydock skydns"
stop_keep="keep_server_0 keep_server_1"
fi
- $DOCKER stop $stop_doc $stop_sso $stop_api $stop_compute $stop_workbench $stop_keep \
+ $DOCKER stop $stop_doc $stop_sso $stop_api $stop_compute $stop_workbench $stop_nameserver $stop_keep \
2>/dev/null
}
diff --git a/docker/slurm/slurm.conf.in b/docker/slurm/slurm.conf.in
index 6957e6f..7312a0e 100644
--- a/docker/slurm/slurm.conf.in
+++ b/docker/slurm/slurm.conf.in
@@ -53,8 +53,8 @@ PartitionName=DEFAULT MaxTime=INFINITE State=UP
PartitionName=compute Default=YES Shared=yes
#PartitionName=sysadmin Hidden=YES Shared=yes
-NodeName=compute[0-63]
+NodeName=compute[0-1]
#NodeName=compute0 RealMemory=6967 Weight=6967
-PartitionName=compute Nodes=compute[0-63]
-PartitionName=crypto Nodes=compute[0-63]
+PartitionName=compute Nodes=compute[0-1]
+PartitionName=crypto Nodes=compute[0-1]
commit 652aec8c581bb6c38bae2c98f0611b142f855e4b
Author: Ward Vandewege <ward at curoverse.com>
Date: Thu Jul 10 16:34:12 2014 -0400
First set of changes to add crunch-dispatch and compute nodes. This is not complete yet.
refs #3219
diff --git a/docker/api/Dockerfile b/docker/api/Dockerfile
index 99a0b4c..8f69b80 100644
--- a/docker/api/Dockerfile
+++ b/docker/api/Dockerfile
@@ -9,10 +9,24 @@ MAINTAINER Tim Pierce <twp at curoverse.com>
# check a git repo for crunch scripts.
#
RUN apt-get update && \
- apt-get -q -y install procps postgresql postgresql-server-dev-9.1 apache2 \
+ apt-get -q -y install procps postgresql postgresql-server-dev-9.1 apache2 slurm-llnl munge \
supervisor && \
git clone --bare git://github.com/curoverse/arvados.git /var/cache/git/arvados.git
+# For crunch-dispatch
+#ADD apt.arvados.org.list /etc/apt/sources.list.d/
+
+#RUN apt-key adv --keyserver pgp.mit.edu --recv 1078ECD7 && apt-get update && \
+#RUN apt-get -q -y install libjson-perl libwww-perl libio-socket-ssl-perl libipc-system-simple-perl slurm-llnl munge
+# apt-get -q -y install arvados-src libjson-perl libwww-perl libio-socket-ssl-perl libipc-system-simple-perl slurm-llnl munge
+
+ADD munge.key /etc/munge/
+RUN chown munge:munge /etc/munge/munge.key
+ADD generated/slurm.conf /etc/slurm-llnl/
+
+RUN /usr/local/rvm/bin/rvm-exec default gem install arvados-cli arvados
+# /for crunch-dispatch
+
RUN /bin/mkdir -p /usr/src/arvados/services
ADD generated/api.tar.gz /usr/src/arvados/services/
@@ -50,6 +64,7 @@ RUN a2dissite default && \
# Supervisor.
ADD supervisor.conf /etc/supervisor/conf.d/arvados.conf
ADD ssh.sh /usr/local/bin/ssh.sh
+ADD crunch-dispatch-run.sh /usr/local/bin/crunch-dispatch-run.sh
ADD apache2_foreground.sh /etc/apache2/foreground.sh
# Start the supervisor.
diff --git a/docker/api/apt.arvados.org.list b/docker/api/apt.arvados.org.list
new file mode 100644
index 0000000..7eb8716
--- /dev/null
+++ b/docker/api/apt.arvados.org.list
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ wheezy main
diff --git a/docker/api/crunch-dispatch-run.sh b/docker/api/crunch-dispatch-run.sh
new file mode 100755
index 0000000..c16a433
--- /dev/null
+++ b/docker/api/crunch-dispatch-run.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+set -e
+export PATH="$PATH":/usr/local/arvados/src/services/crunch
+export PERLLIB=/usr/local/arvados/src/sdk/perl/lib
+export ARVADOS_API_HOST=qr1hi.arvadosapi.com
+export CRUNCH_DISPATCH_LOCKFILE=/var/lock/crunch-dispatch
+
+if [[ ! -e $CRUNCH_DISPATCH_LOCKFILE ]]; then
+ touch $CRUNCH_DISPATCH_LOCKFILE
+fi
+
+export CRUNCH_JOB_BIN=/usr/local/arvados/src/services/crunch/crunch-job
+export HOME=`pwd`
+fuser -TERM -k $CRUNCH_DISPATCH_LOCKFILE || true
+
+cd /usr/src/arvados/services/api
+export RAILS_ENV=production
+/usr/local/rvm/bin/rvm-exec default bundle install
+exec /usr/local/rvm/bin/rvm-exec default bundle exec ./script/crunch-dispatch.rb 2>&1
+
diff --git a/docker/api/munge.key b/docker/api/munge.key
new file mode 100644
index 0000000..34036a0
Binary files /dev/null and b/docker/api/munge.key differ
diff --git a/docker/api/slurm.conf.in b/docker/api/slurm.conf.in
new file mode 100644
index 0000000..6957e6f
--- /dev/null
+++ b/docker/api/slurm.conf.in
@@ -0,0 +1,60 @@
+
+ControlMachine=api
+#SlurmUser=slurmd
+SlurmctldPort=6817
+SlurmdPort=6818
+AuthType=auth/munge
+#JobCredentialPrivateKey=/etc/slurm-llnl/slurm-key.pem
+#JobCredentialPublicCertificate=/etc/slurm-llnl/slurm-cert.pem
+StateSaveLocation=/tmp
+SlurmdSpoolDir=/tmp/slurmd
+SwitchType=switch/none
+MpiDefault=none
+SlurmctldPidFile=/var/run/slurmctld.pid
+SlurmdPidFile=/var/run/slurmd.pid
+ProctrackType=proctrack/pgid
+CacheGroups=0
+ReturnToService=2
+TaskPlugin=task/affinity
+#
+# TIMERS
+SlurmctldTimeout=300
+SlurmdTimeout=300
+InactiveLimit=0
+MinJobAge=300
+KillWait=30
+Waittime=0
+#
+# SCHEDULING
+SchedulerType=sched/backfill
+#SchedulerType=sched/builtin
+SchedulerPort=7321
+#SchedulerRootFilter=
+#SelectType=select/linear
+SelectType=select/cons_res
+SelectTypeParameters=CR_CPU_Memory
+FastSchedule=1
+#
+# LOGGING
+SlurmctldDebug=3
+#SlurmctldLogFile=
+SlurmdDebug=3
+#SlurmdLogFile=
+JobCompType=jobcomp/none
+#JobCompLoc=
+JobAcctGatherType=jobacct_gather/none
+#JobAcctLogfile=
+#JobAcctFrequency=
+#
+# COMPUTE NODES
+NodeName=DEFAULT
+# CPUs=8 State=UNKNOWN RealMemory=6967 Weight=6967
+PartitionName=DEFAULT MaxTime=INFINITE State=UP
+PartitionName=compute Default=YES Shared=yes
+#PartitionName=sysadmin Hidden=YES Shared=yes
+
+NodeName=compute[0-63]
+#NodeName=compute0 RealMemory=6967 Weight=6967
+
+PartitionName=compute Nodes=compute[0-63]
+PartitionName=crypto Nodes=compute[0-63]
diff --git a/docker/api/supervisor.conf b/docker/api/supervisor.conf
index a4f9129..9c4a6a5 100644
--- a/docker/api/supervisor.conf
+++ b/docker/api/supervisor.conf
@@ -10,3 +10,15 @@ command=/usr/lib/postgresql/9.1/bin/postgres -D /var/lib/postgresql/9.1/main -c
[program:apache2]
command=/etc/apache2/foreground.sh
stopsignal=6
+
+[program:munge]
+user=root
+command=/etc/init.d/munge start
+
+[program:slurm]
+user=root
+command=/etc/init.d/slurm-llnl start
+
+[program:crunch-dispatch]
+user=root
+command=/usr/local/bin/crunch-dispatch-run.sh
diff --git a/docker/arvdock b/docker/arvdock
index f2edc19..544c641 100755
--- a/docker/arvdock
+++ b/docker/arvdock
@@ -7,15 +7,18 @@ if [[ "$DOCKER" == "" ]]; then
DOCKER=`which docker`
fi
+COMPUTE_COUNTER=0
+
function usage {
echo >&2
echo >&2 "usage: $0 (start|stop|restart|test) [options]"
echo >&2
echo >&2 "$0 start/stop/restart options:"
- echo >&2 " -d [port], --doc[=port] Documentation server (default port 9898)"
- echo >&2 " -w [port], --workbench[=port] Workbench server (default port 9899)"
- echo >&2 " -s [port], --sso[=port] SSO server (default port 9901)"
- echo >&2 " -a [port], --api[=port] API server (default port 9900)"
+ echo >&2 " -d[port], --doc[=port] Documentation server (default port 9898)"
+ echo >&2 " -w[port], --workbench[=port] Workbench server (default port 9899)"
+ echo >&2 " -s[port], --sso[=port] SSO server (default port 9901)"
+ echo >&2 " -a[port], --api[=port] API server (default port 9900)"
+ echo >&2 " -c[count], --compute[=count] Compute nodes (default starts 2)"
echo >&2 " -k, --keep Keep servers"
echo >&2 " --ssh Enable SSH access to server containers"
echo >&2 " -h, --help Display this help and exit"
@@ -39,7 +42,15 @@ function start_container {
fi
if [[ "$2" != '' ]]; then
local name="$2"
- args="$args --name $name"
+ if [[ "$name" == "api_server" ]]; then
+ args="$args --hostname api --name $name"
+ elif [[ "$name" == "compute" ]]; then
+ name=$name$COMPUTE_COUNTER
+ args="$args --hostname compute$COMPUTE_COUNTER --name $name"
+ let COMPUTE_COUNTER=$(($COMPUTE_COUNTER + 1))
+ else
+ args="$args --name $name"
+ fi
fi
if [[ "$3" != '' ]]; then
local volume="$3"
@@ -130,12 +141,13 @@ function do_start {
local start_doc=false
local start_sso=false
local start_api=false
+ local start_compute=false
local start_workbench=false
local start_keep=false
# NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
- local TEMP=`getopt -o d::s::a::w::kh \
- --long doc::,sso::,api::,workbench::,keep,help,ssh \
+ local TEMP=`getopt -o d::s::a::c::w::kh \
+ --long doc::,sso::,api::,compute::,workbench::,keep,help,ssh \
-n "$0" -- "$@"`
if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -164,6 +176,12 @@ function do_start {
*) start_api=$2; shift 2 ;;
esac
;;
+ -c | --compute)
+ case "$2" in
+ "") start_compute=2; shift 2 ;;
+ *) start_compute=$2; shift 2 ;;
+ esac
+ ;;
-w | --workbench)
case "$2" in
"") start_workbench=9899; shift 2 ;;
@@ -194,12 +212,14 @@ function do_start {
if [[ $start_doc == false &&
$start_sso == false &&
$start_api == false &&
+ $start_compute == false &&
$start_workbench == false &&
$start_keep == false ]]
then
start_doc=9898
start_sso=9901
start_api=9900
+ start_compute=2
start_workbench=9899
start_keep=true
fi
@@ -214,6 +234,13 @@ function do_start {
start_container "$start_api:443" "api_server" '' "sso_server:sso" "arvados/api"
fi
+ if [[ $start_compute != false ]]
+ then
+ for i in `seq 0 $(($start_compute - 1))`; do
+ start_container "" "compute" '' "api_server:api" "arvados/compute"
+ done
+ fi
+
if [[ $start_keep != false ]]
then
# create `keep_volumes' array with a list of keep mount points
@@ -258,12 +285,13 @@ function do_stop {
local stop_doc=""
local stop_sso=""
local stop_api=""
+ local stop_compute=""
local stop_workbench=""
local stop_keep=""
# NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
- local TEMP=`getopt -o d::s::a::w::kh \
- --long doc::,sso::,api::,workbench::,keep,help,ssh \
+ local TEMP=`getopt -o dsacwkh \
+ --long doc,sso,api,compute,workbench,keep,help \
-n "$0" -- "$@"`
if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -275,18 +303,17 @@ function do_stop {
do
case $1 in
-d | --doc)
- stop_doc=doc_server ; shift 2 ;;
+ stop_doc=doc_server ; shift ;;
-s | --sso)
- stop_sso=sso_server ; shift 2 ;;
+ stop_sso=sso_server ; shift ;;
-a | --api)
- stop_api=api_server ; shift 2 ;;
+ stop_api=api_server ; shift ;;
+ -c | --compute)
+ stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '` ; shift ;;
-w | --workbench)
- stop_workbench=workbench_server ; shift 2 ;;
+ stop_workbench=workbench_server ; shift ;;
-k | --keep )
stop_keep="keep_server_0 keep_server_1" ; shift ;;
- --ssh)
- shift
- ;;
--)
shift
break
@@ -302,17 +329,19 @@ function do_stop {
if [[ $stop_doc == "" &&
$stop_sso == "" &&
$stop_api == "" &&
+ $stop_compute == "" &&
$stop_workbench == "" &&
$stop_keep == "" ]]
then
stop_doc=doc_server
stop_sso=sso_server
stop_api=api_server
+ stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '`
stop_workbench=workbench_server
stop_keep="keep_server_0 keep_server_1"
fi
- $DOCKER stop $stop_doc $stop_sso $stop_api $stop_workbench $stop_keep \
+ $DOCKER stop $stop_doc $stop_sso $stop_api $stop_compute $stop_workbench $stop_keep \
2>/dev/null
}
diff --git a/docker/build_tools/Makefile b/docker/build_tools/Makefile
index 267e244..e2fd50d 100644
--- a/docker/build_tools/Makefile
+++ b/docker/build_tools/Makefile
@@ -1,4 +1,4 @@
-all: api-image doc-image workbench-image keep-image sso-image
+all: api-image compute-image doc-image workbench-image keep-image sso-image
# `make clean' removes the files generated in the build directory
# but does not remove any docker images generated in previous builds
@@ -24,11 +24,15 @@ BUILD = build/.buildstamp
BASE_DEPS = base/Dockerfile $(BASE_GENERATED)
+SLURM_DEPS = slurm/Dockerfile $(SLURM_GENERATED)
+
JOBS_DEPS = jobs/Dockerfile
JAVA_BWA_SAMTOOLS_DEPS = java-bwa-samtools/Dockerfile
-API_DEPS = api/Dockerfile $(API_GENERATED)
+API_DEPS = api/* $(API_GENERATED)
+
+COMPUTE_DEPS = compute/* $(COMPUTE_GENERATED)
DOC_DEPS = doc/Dockerfile doc/apache2_vhost
@@ -43,12 +47,15 @@ BCBIO_NEXTGEN_DEPS = bcbio-nextgen/Dockerfile
BASE_GENERATED = base/generated/arvados.tar.gz
+SLURM_GENERATED = slurm/generated/*
+
API_GENERATED = \
api/generated/apache2_vhost \
api/generated/config_databases.sh \
api/generated/database.yml \
api/generated/omniauth.rb \
api/generated/application.yml \
+ api/generated/slurm.conf \
api/generated/superuser_token
API_GENERATED_IN = \
@@ -57,8 +64,15 @@ API_GENERATED_IN = \
api/database.yml.in \
api/omniauth.rb.in \
api/application.yml.in \
+ api/slurm.conf.in \
api/superuser_token.in
+SLURM_GENERATED = \
+ slurm/generated/slurm.conf
+
+SLURM_GENERATED_IN = \
+ slurm/slurm.conf.in
+
WORKBENCH_GENERATED = \
workbench/generated/apache2_vhost \
workbench/generated/application.yml
@@ -88,6 +102,10 @@ $(BUILD):
cd build/sdk/ruby && gem build arvados.gemspec
touch build/.buildstamp
+$(SLURM_GENERATED): config.yml $(BUILD)
+ $(CONFIG_RB)
+ mkdir -p slurm/generated
+
$(BASE_GENERATED): config.yml $(BUILD)
$(CONFIG_RB)
mkdir -p base/generated
@@ -119,6 +137,12 @@ api-image: passenger-image $(BUILD) $(API_DEPS)
$(DOCKER_BUILD) -t arvados/api api
date >api-image
+slurm-image: base-image $(SLURM_DEPS)
+
+compute-image: slurm-image $(BUILD) $(COMPUTE_DEPS)
+ $(DOCKER_BUILD) -t arvados/compute compute
+ date >compute-image
+
doc-image: base-image $(BUILD) $(DOC_DEPS)
mkdir -p doc/generated
tar -czf doc/generated/doc.tar.gz -C build doc
@@ -165,6 +189,10 @@ passenger-image: base-image
$(DOCKER_BUILD) -t arvados/passenger passenger
date >passenger-image
+slurm-image: base-image $(SLURM_DEPS)
+ $(DOCKER_BUILD) -t arvados/slurm slurm
+ date >slurm-image
+
base-image: debian-image $(BASE_DEPS)
$(DOCKER_BUILD) -t arvados/base base
date >base-image
diff --git a/docker/compute/Dockerfile b/docker/compute/Dockerfile
new file mode 100644
index 0000000..8c403b5
--- /dev/null
+++ b/docker/compute/Dockerfile
@@ -0,0 +1,15 @@
+# Arvados compute node Docker container.
+
+FROM arvados/slurm
+MAINTAINER Ward Vandewege <ward at curoverse.com>
+
+RUN apt-get update && apt-get -q -y install supervisor
+
+RUN /usr/local/rvm/bin/rvm-exec default gem install arvados-cli arvados
+
+# Supervisor.
+ADD supervisor.conf /etc/supervisor/conf.d/arvados.conf
+ADD ssh.sh /usr/local/bin/ssh.sh
+
+# Start the supervisor.
+CMD ["/usr/bin/supervisord", "-n"]
diff --git a/docker/compute/ssh.sh b/docker/compute/ssh.sh
new file mode 100755
index 0000000..664414b
--- /dev/null
+++ b/docker/compute/ssh.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+echo $ENABLE_SSH
+
+# Start ssh daemon if requested via the ENABLE_SSH env variable
+if [[ ! "$ENABLE_SSH" =~ (0|false|no|f|^$) ]]; then
+echo "STARTING"
+ /etc/init.d/ssh start
+fi
+
diff --git a/docker/compute/supervisor.conf b/docker/compute/supervisor.conf
new file mode 100644
index 0000000..6563b54
--- /dev/null
+++ b/docker/compute/supervisor.conf
@@ -0,0 +1,14 @@
+[program:ssh]
+user=root
+command=/usr/local/bin/ssh.sh
+startsecs=0
+
+[program:munge]
+user=root
+command=/etc/init.d/munge start
+
+[program:slurm]
+user=root
+command=/etc/init.d/slurm-llnl start
+
+
diff --git a/docker/slurm/Dockerfile b/docker/slurm/Dockerfile
new file mode 100644
index 0000000..cfd63fc
--- /dev/null
+++ b/docker/slurm/Dockerfile
@@ -0,0 +1,11 @@
+# Slurm node Docker container.
+
+FROM arvados/base
+MAINTAINER Ward Vandewege <ward at curoverse.com>
+
+RUN apt-get update && apt-get -q -y install slurm-llnl munge
+
+ADD munge.key /etc/munge/
+RUN chown munge:munge /etc/munge/munge.key
+ADD generated/slurm.conf /etc/slurm-llnl/
+
diff --git a/docker/slurm/munge.key b/docker/slurm/munge.key
new file mode 100644
index 0000000..34036a0
Binary files /dev/null and b/docker/slurm/munge.key differ
diff --git a/docker/slurm/slurm.conf.in b/docker/slurm/slurm.conf.in
new file mode 100644
index 0000000..6957e6f
--- /dev/null
+++ b/docker/slurm/slurm.conf.in
@@ -0,0 +1,60 @@
+
+ControlMachine=api
+#SlurmUser=slurmd
+SlurmctldPort=6817
+SlurmdPort=6818
+AuthType=auth/munge
+#JobCredentialPrivateKey=/etc/slurm-llnl/slurm-key.pem
+#JobCredentialPublicCertificate=/etc/slurm-llnl/slurm-cert.pem
+StateSaveLocation=/tmp
+SlurmdSpoolDir=/tmp/slurmd
+SwitchType=switch/none
+MpiDefault=none
+SlurmctldPidFile=/var/run/slurmctld.pid
+SlurmdPidFile=/var/run/slurmd.pid
+ProctrackType=proctrack/pgid
+CacheGroups=0
+ReturnToService=2
+TaskPlugin=task/affinity
+#
+# TIMERS
+SlurmctldTimeout=300
+SlurmdTimeout=300
+InactiveLimit=0
+MinJobAge=300
+KillWait=30
+Waittime=0
+#
+# SCHEDULING
+SchedulerType=sched/backfill
+#SchedulerType=sched/builtin
+SchedulerPort=7321
+#SchedulerRootFilter=
+#SelectType=select/linear
+SelectType=select/cons_res
+SelectTypeParameters=CR_CPU_Memory
+FastSchedule=1
+#
+# LOGGING
+SlurmctldDebug=3
+#SlurmctldLogFile=
+SlurmdDebug=3
+#SlurmdLogFile=
+JobCompType=jobcomp/none
+#JobCompLoc=
+JobAcctGatherType=jobacct_gather/none
+#JobAcctLogfile=
+#JobAcctFrequency=
+#
+# COMPUTE NODES
+NodeName=DEFAULT
+# CPUs=8 State=UNKNOWN RealMemory=6967 Weight=6967
+PartitionName=DEFAULT MaxTime=INFINITE State=UP
+PartitionName=compute Default=YES Shared=yes
+#PartitionName=sysadmin Hidden=YES Shared=yes
+
+NodeName=compute[0-63]
+#NodeName=compute0 RealMemory=6967 Weight=6967
+
+PartitionName=compute Nodes=compute[0-63]
+PartitionName=crypto Nodes=compute[0-63]
diff --git a/docker/slurm/supervisor.conf b/docker/slurm/supervisor.conf
new file mode 100644
index 0000000..6563b54
--- /dev/null
+++ b/docker/slurm/supervisor.conf
@@ -0,0 +1,14 @@
+[program:ssh]
+user=root
+command=/usr/local/bin/ssh.sh
+startsecs=0
+
+[program:munge]
+user=root
+command=/etc/init.d/munge start
+
+[program:slurm]
+user=root
+command=/etc/init.d/slurm-llnl start
+
+
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list