[ARVADOS] created: 1.3.0-2825-g70c33e519
Git user
git at public.arvados.org
Tue Jul 28 17:40:12 UTC 2020
at 70c33e51905a84c3dcb61c88e5ec5c6479599c28 (commit)
commit 70c33e51905a84c3dcb61c88e5ec5c6479599c28
Author: Ward Vandewege <ward at curii.com>
Date: Tue Jul 28 13:37:46 2020 -0400
16616: move the compute-image packer templates into the arvados
repository. Update documentation.
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/doc/_config.yml b/doc/_config.yml
index be52a204c..bbab3f307 100644
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -220,9 +220,10 @@ navbar:
- install/install-shell-server.html.textile.liquid
- install/install-webshell.html.textile.liquid
- Containers API:
- - install/crunch2-slurm/install-compute-node.html.textile.liquid
- install/install-jobs-image.html.textile.liquid
- - install/install-dispatch-cloud.html.textile.liquid
+ - install/crunch2-cloud/install-compute-node.html.textile.liquid
+ - install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
+ - install/crunch2-slurm/install-compute-node.html.textile.liquid
- install/crunch2-slurm/install-dispatch.html.textile.liquid
- install/crunch2-slurm/install-test.html.textile.liquid
- External dependencies:
diff --git a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
new file mode 100644
index 000000000..4659ccb1e
--- /dev/null
+++ b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
@@ -0,0 +1,186 @@
+---
+layout: default
+navsection: installguide
+title: Build a cloud compute node image
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+{% include 'notebox_begin_warning' %}
+arvados-dispatch-cloud is only relevant for cloud installations. Skip this section if you are installing a on premises cluster that will spool jobs to Slurm.
+{% include 'notebox_end' %}
+
+# "Introduction":#introduction
+# "Create an SSH keypair":#sshkeypair
+# "The build script":#building
+# "Build an Azure image":#azure
+# "Build an AWS image":#aws
+
+h2(#introduction). Introduction
+
+This page describes how to build a compute node image that it can be used to run containers dispatched by Arvados in the cloud.
+
+Packer templates for AWS and Azure are provided with Arvados. To use them, the following are needed:
+
+* "Packer":https://www.packer.io/
+* credentials for your cloud account
+* configuration details for your cloud account
+
+
+h2(#sshkeypair). Create an SSH keypair
+
+ at arvados-dispatch-cloud@ communicates with the compute nodes via SSH. To do this securely, an SSH keypair is needed.
+
+Generate an SSH keypair with no passphrase. The private key needs to be stored in the cluster configuration file (see @Containers/DispatchPrivateKey@) for use by @arvados-dispatch-cloud@, as described in the "next section":install-dispatch-cloud.html#update-config. The public key will be baked into the compute node images, see the cloud-specific documentation below.
+
+<notextile>
+<pre><code>~$ <span class="userinput">ssh-keygen -N '' -f ~/.ssh/id_dispatcher</span>
+Generating public/private rsa key pair.
+Your identification has been saved in /home/user/.ssh/id_dispatcher.
+Your public key has been saved in /home/user/.ssh/id_dispatcher.pub.
+The key fingerprint is:
+[...]
+~$ <span class="userinput">cat ~/.ssh/id_dispatcher</span>
+-----BEGIN RSA PRIVATE KEY-----
+MIIEpQIBAAKCAQEAqXoCzcOBkFQ7w4dvXf9B++1ctgZRqEbgRYL3SstuMV4oawks
+ttUuxJycDdsPmeYcHsKo8vsEZpN6iYsX6ZZzhkO5nEayUTU8sBjmg1ZCTo4QqKXr
+...
+oFyAjVoexx0RBcH6BveTfQtJKbktP1qBO4mXo2dP0cacuZEtlAqW9Eb06Pvaw/D9
+foktmqOY8MyctzFgXBpGTxPliGjqo8OkrOyQP2g+FL7v+Km31Xs61P8=
+-----END RSA PRIVATE KEY-----
+</code></pre>
+</notextile>
+
+h2(#building). The build script
+
+The necessary files are located in the @arvados/tools/compute-images@ directory in the source tree. A build script is provided to generate the image. The @--help@ argument lists all available options:
+
+<notextile><pre><code>~$ <span class="userinput">./build.sh --help</span>
+build.sh: Build cloud images for arvados-dispatch-cloud
+
+Syntax:
+ build.sh [options]
+
+Options:
+
+ --json-file (required)
+ Path to the packer json file
+ --arvados-cluster-id (required)
+ The ID of the Arvados cluster, e.g. zzzzz
+ --aws-profile (default: false)
+ AWS profile to use (valid profile from ~/.aws/config
+ --aws-secrets-file (default: false, required if building for AWS)
+ AWS secrets file which will be sourced from this script
+ --aws-source-ami (default: false, required if building for AWS)
+ The AMI to use as base for building the images
+ --aws-region (default: us-east-1)
+ The AWS region to use for building the images
+ --aws-vpc-id (optional)
+ VPC id for AWS, otherwise packer will pick the default one
+ --aws-subnet-id
+ Subnet id for AWS otherwise packer will pick the default one for the VPC
+ --gcp-project-id (default: false, required if building for GCP)
+ GCP project id
+ --gcp-account-file (default: false, required if building for GCP)
+ GCP account file
+ --gcp-zone (default: us-central1-f)
+ GCP zone
+ --azure-secrets-file (default: false, required if building for Azure)
+ Azure secrets file which will be sourced from this script
+ --azure-resource-group (default: false, required if building for Azure)
+ Azure resource group
+ --azure-storage-account (default: false, required if building for Azure)
+ Azure storage account
+ --azure-location (default: false, required if building for Azure)
+ Azure location, e.g. centralus, eastus, westeurope
+ --azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS)
+ Azure SKU image to use
+ --ssh_user (default: packer)
+ The user packer will use lo log into the image
+ --domain (default: arvadosapi.com)
+ The domain part of the FQDN for the cluster
+ --resolver (default: 8.8.8.8)
+ The dns resolver for the machine
+ --reposuffix (default: unset)
+ Set this to "-dev" to track the unstable/dev Arvados repositories
+ --public-key-file (required)
+ Path to the public key file that a-d-c will use to log into the compute node
+ --debug
+ Output debug information (default: false)
+</code></pre></notextile>
+
+h2(#azure). Build an Azure image
+
+<notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-azure.json \
+ --arvados-cluster-id ClusterID \
+ --azure-resource-group ResourceGroup \
+ --azure-storage-account StorageAccount \
+ --azure-location AzureRegion \
+ --azure-sku AzureSKU \
+ --azure-secrets-file AzureSecretsFilePath \
+ --resolver ResolverIP \
+ --public-key-file ArvadosDispatchCloudPublicKeyPath
+</span>
+</code></pre></notextile>
+
+For @ClusterID@, fill in your cluster ID. The @ResourceGroup@, @StorageAccount@ and @AzureRegion@ (e.g. 'eastus2') should be configured for where you want the compute image to be generated and stored. The @AzureSKU@ is the SKU of the base image to be used, e.g. '18.04-LTS' for Ubuntu 18.04.
+
+ at AzureSecretsFilePath@ should be replaced with the path to a shell script that loads the Azure secrets with sufficient permissions to create the image. The file would look like this:
+
+<notextile><pre><code>export ARM_CLIENT_ID=...
+export ARM_CLIENT_SECRET=...
+export ARM_SUBSCRIPTION_ID=...
+export ARM_TENANT_ID=...
+</code></pre></notextile>
+
+These secrets can be generated from the Azure portal, or with the cli using a command like this:
+
+<notextile><pre><code>~$ <span class="userinput">az ad sp create-for-rbac --name Packer --password ...</span>
+</code></pre></notextile>
+
+ at ArvadosDispatchCloudPublicKeyPath@ should be replaced with the path to the ssh *public* key file generated in "Create an SSH keypair":#sshkeypair, above.
+
+Compute nodes must be able to resolve the hostnames of the API server and any keepstore servers to your internal IP addresses. You can do this by running an internal DNS resolver. The IP address of the resolver should replace the string @ResolverIP@ in the command above.
+
+Alternatively, the services could be hardcoded into an @/etc/hosts@ file. For example:
+
+<notextile><pre><code>10.20.30.40 <span class="userinput">ClusterID.example.com</span>
+10.20.30.41 <span class="userinput">keep1.ClusterID.example.com</span>
+10.20.30.42 <span class="userinput">keep2.ClusterID.example.com</span>
+</code></pre></notextile>
+
+Adding these lines to the @/etc/hosts@ file in the compute node image could be done with a small change to the Packer template and the @scripts/base.sh@ script, which will be left as an exercise for the reader.
+
+h2(#aws). Build an AWS image
+
+<notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-aws.json \
+ --arvados-cluster-id ClusterID \
+ --aws-profile AWSProfile \
+ --aws-source-ami AMI \
+ --aws-vpc-id VPC \
+ --aws-subnet-id Subnet \
+ --ssh_user admin \
+ --resolver ResolverIP \
+ --public-key-file ArvadosDispatchCloudPublicKeyPath
+</span>
+</code></pre></notextile>
+
+For @ClusterID@, fill in your cluster ID. The @VPC@ and @Subnet@ should be configured for where you want the compute image to be generated and stored. The @AMI@ is the identifier for the base image to be used. Current AMIs are maintained by "Debian":https://wiki.debian.org/Cloud/AmazonEC2Image/Buster and "Ubuntu":https://cloud-images.ubuntu.com/locator/ec2/.
+
+ at AWSProfile@ should be replaced name of an AWS profile with sufficient permissions to create the image.
+
+ at ArvadosDispatchCloudPublicKeyPath@ should be replaced with the path to the ssh *public* key file generated in "Create an SSH keypair":#sshkeypair, above.
+
+Compute nodes must be able to resolve the hostnames of the API server and any keepstore servers to your internal IP addresses. You can do this by running an internal DNS resolver. The IP address of the resolver should replace the string @ResolverIP@ in the command above.
+
+Alternatively, the services could be hardcoded into an @/etc/hosts@ file. For example:
+
+<notextile><pre><code>10.20.30.40 <span class="userinput">ClusterID.example.com</span>
+10.20.30.41 <span class="userinput">keep1.ClusterID.example.com</span>
+10.20.30.42 <span class="userinput">keep2.ClusterID.example.com</span>
+</code></pre></notextile>
+
+Adding these lines to the @/etc/hosts@ file in the compute node image could be done with a small change to the Packer template and the @scripts/base.sh@ script, which will be left as an exercise for the reader.
diff --git a/doc/install/install-dispatch-cloud.html.textile.liquid b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
similarity index 82%
rename from doc/install/install-dispatch-cloud.html.textile.liquid
rename to doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
index 43cad2db8..3d6352ccb 100644
--- a/doc/install/install-dispatch-cloud.html.textile.liquid
+++ b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
@@ -10,7 +10,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
{% include 'notebox_begin_warning' %}
-arvados-dispatch-cloud is only relevant for cloud installations. Skip this section if you are installing a on premise cluster that will spool jobs to Slurm.
+arvados-dispatch-cloud is only relevant for cloud installations. Skip this section if you are installing a on premises cluster that will spool jobs to Slurm.
{% include 'notebox_end' %}
# "Introduction":#introduction
@@ -27,53 +27,11 @@ The cloud dispatch service is for running containers on cloud VMs. It works with
The cloud dispatch service can run on any node that can connect to the Arvados API service, the cloud provider's API, and the SSH service on cloud VMs. It is not resource-intensive, so you can run it on the API server node.
-h2(#create-image). Create compute node VM image and configure resolver
-
-Set up a VM following the steps "to set up a compute node":crunch2-slurm/install-compute-node.html
-
-Compute nodes must be able to resolve the hostnames of the API server and any keepstore servers to your internal IP addresses. You can do this by running an internal DNS resolver and configuring the compute VMs to use that resolver, or by hardcoding the services in the @/etc/hosts@ file. For example:
-
-<notextile><pre><code>10.20.30.40 <span class="userinput">ClusterID.example.com</span>
-10.20.30.41 <span class="userinput">keep1.ClusterID.example.com</span>
-10.20.30.42 <span class="userinput">keep2.ClusterID.example.com</span>
-</code></pre></notextile>
-
-Once the VM is fully configured, create a reusable VM image from it and make note of the image id.
-
h2(#update-config). Update config.yml
-h3. Create a private key
-
-Generate an SSH private key with no passphrase. Save it in the cluster configuration file (see @PrivateKey@ in the example below).
-
-<notextile>
-<pre><code>~$ <span class="userinput">ssh-keygen -N '' -f ~/.ssh/id_dispatcher</span>
-Generating public/private rsa key pair.
-Your identification has been saved in /home/user/.ssh/id_dispatcher.
-Your public key has been saved in /home/user/.ssh/id_dispatcher.pub.
-The key fingerprint is:
-[...]
-~$ <span class="userinput">cat ~/.ssh/id_dispatcher</span>
------BEGIN RSA PRIVATE KEY-----
-MIIEpQIBAAKCAQEAqXoCzcOBkFQ7w4dvXf9B++1ctgZRqEbgRYL3SstuMV4oawks
-ttUuxJycDdsPmeYcHsKo8vsEZpN6iYsX6ZZzhkO5nEayUTU8sBjmg1ZCTo4QqKXr
-...
-oFyAjVoexx0RBcH6BveTfQtJKbktP1qBO4mXo2dP0cacuZEtlAqW9Eb06Pvaw/D9
-foktmqOY8MyctzFgXBpGTxPliGjqo8OkrOyQP2g+FL7v+Km31Xs61P8=
------END RSA PRIVATE KEY-----
-</code></pre>
-</notextile>
-
-You can delete the key files after you have copied the private key to your configuration file.
-
-<notextile>
-<pre><code>~$ <span class="userinput">rm ~/.ssh/id_dispatcher ~/.ssh/id_dispatcher.pub</span>
-</code></pre>
-</notextile>
-
h3. Configure CloudVMs
-Add or update the following portions of your cluster configuration file, @config.yml at . Refer to "config.defaults.yml":{{site.baseurl}}/admin/config.html for information about additional configuration options.
+Add or update the following portions of your cluster configuration file, @config.yml at . Refer to "config.defaults.yml":{{site.baseurl}}/admin/config.html for information about additional configuration options. The @DispatchPrivateKey@ should be the *private* key generated in "the previous section":install-compute-node.html#sshkeypair.
<notextile>
<pre><code> Services:
diff --git a/doc/install/crunch2-slurm/install-compute-node.html.textile.liquid b/doc/install/crunch2-slurm/install-compute-node.html.textile.liquid
index e93332c92..f25d1f0a3 100644
--- a/doc/install/crunch2-slurm/install-compute-node.html.textile.liquid
+++ b/doc/install/crunch2-slurm/install-compute-node.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: installguide
-title: Set up a compute node
+title: Set up a Slurm compute node
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
@@ -9,6 +9,10 @@ Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
+{% include 'notebox_begin_warning' %}
+crunch-dispatch-slurm is only relevant for on premises clusters that will spool jobs to Slurm. Skip this section if you are installing a cloud cluster.
+{% include 'notebox_end' %}
+
# "Introduction":#introduction
# "Set up Docker":#docker
# "Update fuse.conf":#fuse
@@ -20,10 +24,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0
h2(#introduction). Introduction
-This page describes how to configure a compute node so that it can be used to run containers dispatched by Arvados.
-
-* If you are using the cloud dispatcher, apply these step and then save a compute node virtual machine image. The virtual machine image id will go in @config.yml at .
-* If you are using SLURM on a static custer, these steps must be duplicated on every compute node, preferrably using a devops tool such as Puppet.
+This page describes how to configure a compute node so that it can be used to run containers dispatched by Arvados, with Slurm on a static cluster. These steps must be duplicated on every compute node.
h2(#docker). Set up Docker
diff --git a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
index 300871763..a9689e9ac 100644
--- a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
+++ b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: installguide
-title: Install the SLURM dispatcher
+title: Install the Slurm dispatcher
...
{% comment %}
@@ -11,7 +11,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
{% include 'notebox_begin_warning' %}
-crunch-dispatch-slurm is only relevant for on premise clusters that will spool jobs to Slurm. Skip this section if you are installing a cloud cluster.
+crunch-dispatch-slurm is only relevant for on premises clusters that will spool jobs to Slurm. Skip this section if you are installing a cloud cluster.
{% include 'notebox_end' %}
# "Introduction":#introduction
@@ -22,9 +22,9 @@ crunch-dispatch-slurm is only relevant for on premise clusters that will spool j
h2(#introduction). Introduction
-This assumes you already have a SLURM cluster, and have "set up all of your compute nodes":install-compute-node.html . For information on installing SLURM, see "this install guide":https://slurm.schedmd.com/quickstart_admin.html
+This assumes you already have a Slurm cluster, and have "set up all of your compute nodes":install-compute-node.html . For information on installing Slurm, see "this install guide":https://slurm.schedmd.com/quickstart_admin.html
-The Arvados SLURM dispatcher can run on any node that can submit requests to both the Arvados API server and the SLURM controller (via @sbatch@). It is not resource-intensive, so you can run it on the API server node.
+The Arvados Slurm dispatcher can run on any node that can submit requests to both the Arvados API server and the Slurm controller (via @sbatch@). It is not resource-intensive, so you can run it on the API server node.
h2(#update-config). Update config.yml (optional)
@@ -44,7 +44,7 @@ crunch-dispatch-slurm polls the API server periodically for new containers to ru
h3(#ReserveExtraRAM). Containers.ReserveExtraRAM: Extra RAM for jobs
-Extra RAM to reserve (in bytes) on each SLURM job submitted by Arvados, which is added to the amount specified in the container's @runtime_constraints at . If not provided, the default value is zero. Helpful when using @-cgroup-parent-subsystem@, where @crunch-run@ and @arv-mount@ share the control group memory limit with the user process. In this situation, at least 256MiB is recommended to accomodate each container's @crunch-run@ and @arv-mount@ processes.
+Extra RAM to reserve (in bytes) on each Slurm job submitted by Arvados, which is added to the amount specified in the container's @runtime_constraints at . If not provided, the default value is zero. Helpful when using @-cgroup-parent-subsystem@, where @crunch-run@ and @arv-mount@ share the control group memory limit with the user process. In this situation, at least 256MiB is recommended to accomodate each container's @crunch-run@ and @arv-mount@ processes.
Supports suffixes @KB@, @KiB@, @MB@, @MiB@, @GB@, @GiB@, @TB@, @TiB@, @PB@, @PiB@, @EB@, @EiB@ (where @KB@ is 10[^3^], @KiB@ is 2[^10^], @MB@ is 10[^6^], @MiB@ is 2[^20^] and so forth).
@@ -56,7 +56,7 @@ Supports suffixes @KB@, @KiB@, @MB@, @MiB@, @GB@, @GiB@, @TB@, @TiB@, @PB@, @PiB
h3(#MinRetryPeriod). Containers.MinRetryPeriod: Rate-limit repeated attempts to start containers
-If SLURM is unable to run a container, the dispatcher will submit it again after the next PollPeriod. If PollPeriod is very short, this can be excessive. If MinRetryPeriod is set, the dispatcher will avoid submitting the same container to SLURM more than once in the given time span.
+If Slurm is unable to run a container, the dispatcher will submit it again after the next PollPeriod. If PollPeriod is very short, this can be excessive. If MinRetryPeriod is set, the dispatcher will avoid submitting the same container to Slurm more than once in the given time span.
<notextile>
<pre> Containers:
@@ -64,7 +64,7 @@ If SLURM is unable to run a container, the dispatcher will submit it again after
</pre>
</notextile>
-h3(#KeepServiceURIs). Containers.SLURM.SbatchEnvironmentVariables
+h3(#KeepServiceURIs). Containers.Slurm.SbatchEnvironmentVariables
Some Arvados installations run a local keepstore on each compute node to handle all Keep traffic. To override Keep service discovery and access the local keep server instead of the global servers, set ARVADOS_KEEP_SERVICES in SbatchEnvironmentVariables:
@@ -76,11 +76,11 @@ Some Arvados installations run a local keepstore on each compute node to handle
</code></pre>
</notextile>
-h3(#PrioritySpread). Containers.SLURM.PrioritySpread
+h3(#PrioritySpread). Containers.Slurm.PrioritySpread
-crunch-dispatch-slurm adjusts the "nice" values of its SLURM jobs to ensure containers are prioritized correctly relative to one another. This option tunes the adjustment mechanism.
-* If non-Arvados jobs run on your SLURM cluster, and your Arvados containers are waiting too long in the SLURM queue because their "nice" values are too high for them to compete with other SLURM jobs, you should use a smaller PrioritySpread value.
-* If you have an older SLURM system that limits nice values to 10000, a smaller @PrioritySpread@ can help avoid reaching that limit.
+crunch-dispatch-slurm adjusts the "nice" values of its Slurm jobs to ensure containers are prioritized correctly relative to one another. This option tunes the adjustment mechanism.
+* If non-Arvados jobs run on your Slurm cluster, and your Arvados containers are waiting too long in the Slurm queue because their "nice" values are too high for them to compete with other SLURM jobs, you should use a smaller PrioritySpread value.
+* If you have an older Slurm system that limits nice values to 10000, a smaller @PrioritySpread@ can help avoid reaching that limit.
* In other cases, a larger value is beneficial because it reduces the total number of adjustments made by executing @scontrol at .
The smallest usable value is @1 at . The default value of @10@ is used if this option is zero or negative. Example:
@@ -91,7 +91,7 @@ The smallest usable value is @1 at . The default value of @10@ is used if this opti
<code class="userinput">PrioritySpread: <b>1000</b></code></pre>
</notextile>
-h3(#SbatchArguments). Containers.SLURM.SbatchArgumentsList
+h3(#SbatchArguments). Containers.Slurm.SbatchArgumentsList
When crunch-dispatch-slurm invokes @sbatch@, you can add arguments to the command by specifying @SbatchArguments at . You can use this to send the jobs to specific cluster partitions or add resource requests. Set @SbatchArguments@ to an array of strings. For example:
@@ -105,9 +105,9 @@ When crunch-dispatch-slurm invokes @sbatch@, you can add arguments to the comman
Note: If an argument is supplied multiple times, @slurm@ uses the value of the last occurrence of the argument on the command line. Arguments specified through Arvados are added after the arguments listed in SbatchArguments. This means, for example, an Arvados container with that specifies @partitions@ in @scheduling_parameter@ will override an occurrence of @--partition@ in SbatchArguments. As a result, for container parameters that can be specified through Arvados, SbatchArguments can be used to specify defaults but not enforce specific policy.
-h3(#CrunchRunCommand-cgroups). Containers.CrunchRunArgumentList: Dispatch to SLURM cgroups
+h3(#CrunchRunCommand-cgroups). Containers.CrunchRunArgumentList: Dispatch to Slurm cgroups
-If your SLURM cluster uses the @task/cgroup@ TaskPlugin, you can configure Crunch's Docker containers to be dispatched inside SLURM's cgroups. This provides consistent enforcement of resource constraints. To do this, use a crunch-dispatch-slurm configuration like the following:
+If your Slurm cluster uses the @task/cgroup@ TaskPlugin, you can configure Crunch's Docker containers to be dispatched inside Slurm's cgroups. This provides consistent enforcement of resource constraints. To do this, use a crunch-dispatch-slurm configuration like the following:
<notextile>
<pre> Containers:
@@ -116,13 +116,13 @@ If your SLURM cluster uses the @task/cgroup@ TaskPlugin, you can configure Crunc
</pre>
</notextile>
-The choice of subsystem ("memory" in this example) must correspond to one of the resource types enabled in SLURM's @cgroup.conf at . Limits for other resource types will also be respected. The specified subsystem is singled out only to let Crunch determine the name of the cgroup provided by SLURM. When doing this, you should also set "ReserveExtraRAM":#ReserveExtraRAM .
+The choice of subsystem ("memory" in this example) must correspond to one of the resource types enabled in Slurm's @cgroup.conf at . Limits for other resource types will also be respected. The specified subsystem is singled out only to let Crunch determine the name of the cgroup provided by Slurm. When doing this, you should also set "ReserveExtraRAM":#ReserveExtraRAM .
{% include 'notebox_begin' %}
-Some versions of Docker (at least 1.9), when run under systemd, require the cgroup parent to be specified as a systemd slice. This causes an error when specifying a cgroup parent created outside systemd, such as those created by SLURM.
+Some versions of Docker (at least 1.9), when run under systemd, require the cgroup parent to be specified as a systemd slice. This causes an error when specifying a cgroup parent created outside systemd, such as those created by Slurm.
-You can work around this issue by disabling the Docker daemon's systemd integration. This makes it more difficult to manage Docker services with systemd, but Crunch does not require that functionality, and it will be able to use SLURM's cgroups as container parents. To do this, "configure the Docker daemon on all compute nodes":install-compute-node.html#configure_docker_daemon to run with the option @--exec-opt native.cgroupdriver=cgroupfs at .
+You can work around this issue by disabling the Docker daemon's systemd integration. This makes it more difficult to manage Docker services with systemd, but Crunch does not require that functionality, and it will be able to use Slurm's cgroups as container parents. To do this, "configure the Docker daemon on all compute nodes":install-compute-node.html#configure_docker_daemon to run with the option @--exec-opt native.cgroupdriver=cgroupfs at .
{% include 'notebox_end' %}
diff --git a/doc/install/crunch2-slurm/install-prerequisites.html.textile.liquid b/doc/install/crunch2-slurm/install-prerequisites.html.textile.liquid
index 39f1b7258..23bdd3b26 100644
--- a/doc/install/crunch2-slurm/install-prerequisites.html.textile.liquid
+++ b/doc/install/crunch2-slurm/install-prerequisites.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: installguide
-title: Containers API SLURM prerequisites
+title: Containers API Slurm prerequisites
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
diff --git a/doc/install/crunch2-slurm/install-slurm.html.textile.liquid b/doc/install/crunch2-slurm/install-slurm.html.textile.liquid
index 7f4488fb3..061edf96c 100644
--- a/doc/install/crunch2-slurm/install-slurm.html.textile.liquid
+++ b/doc/install/crunch2-slurm/install-slurm.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: installguide
-title: Set up SLURM
+title: Set up Slurm
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
@@ -9,12 +9,12 @@ Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-Containers can be dispatched to a SLURM cluster. The dispatcher sends work to the cluster using SLURM's @sbatch@ command, so it works in a variety of SLURM configurations.
+Containers can be dispatched to a Slurm cluster. The dispatcher sends work to the cluster using Slurm's @sbatch@ command, so it works in a variety of SLURM configurations.
In order to run containers, you must run the dispatcher as a user that has permission to set up FUSE mounts and run Docker containers on each compute node. This install guide refers to this user as the @crunch@ user. We recommend you create this user on each compute node with the same UID and GID, and add it to the @fuse@ and @docker@ system groups to grant it the necessary permissions. However, you can run the dispatcher under any account with sufficient permissions across the cluster.
-On the API server, install SLURM and munge, and generate a munge key.
+On the API server, install Slurm and munge, and generate a munge key.
On Debian-based systems:
@@ -31,7 +31,7 @@ On Red Hat-based systems:
</code></pre>
</notextile>
-Now we need to give SLURM a configuration file. On Debian-based systems, this is installed at @/etc/slurm-llnl/slurm.conf at . On Red Hat-based systems, this is installed at @/etc/slurm/slurm.conf at . Here's an example @slurm.conf@:
+Now we need to give Slurm a configuration file. On Debian-based systems, this is installed at @/etc/slurm-llnl/slurm.conf at . On Red Hat-based systems, this is installed at @/etc/slurm/slurm.conf at . Here's an example @slurm.conf@:
<notextile>
<pre><code>
@@ -82,19 +82,19 @@ PartitionName=compute Nodes=compute[0-255] Default=YES Shared=YES
</code></pre>
</notextile>
-h3. SLURM configuration essentials
+h3. Slurm configuration essentials
Whenever you change this file, you will need to update the copy _on every compute node_ as well as the controller node, and then run @sudo scontrol reconfigure at .
-*@ControlMachine@* should be a DNS name that resolves to the SLURM controller (dispatch/API server). This must resolve correctly on all SLURM worker nodes as well as the controller itself. In general SLURM is very sensitive about all of the nodes being able to communicate with the controller _and one another_, all using the same DNS names.
+*@ControlMachine@* should be a DNS name that resolves to the Slurm controller (dispatch/API server). This must resolve correctly on all Slurm worker nodes as well as the controller itself. In general SLURM is very sensitive about all of the nodes being able to communicate with the controller _and one another_, all using the same DNS names.
*@SelectType=select/linear@* is needed on cloud-based installations that update node sizes dynamically, but it can only schedule one container at a time on each node. On a static or homogeneous cluster, use @SelectType=select/cons_res@ with @SelectTypeParameters=CR_CPU_Memory@ instead to enable node sharing.
*@NodeName=compute[0-255]@* establishes that the hostnames of the worker nodes will be compute0, compute1, etc. through compute255.
* There are several ways to compress sequences of names, like @compute[0-9,80,100-110]@. See the "hostlist" discussion in the @slurm.conf(5)@ and @scontrol(1)@ man pages for more information.
-* It is not necessary for all of the nodes listed here to be alive in order for SLURM to work, although you should make sure the DNS entries exist. It is easiest to define lots of hostnames up front, assigning them to real nodes and updating your DNS records as the nodes appear. This minimizes the frequency of @slurm.conf@ updates and use of @scontrol reconfigure at .
+* It is not necessary for all of the nodes listed here to be alive in order for Slurm to work, although you should make sure the DNS entries exist. It is easiest to define lots of hostnames up front, assigning them to real nodes and updating your DNS records as the nodes appear. This minimizes the frequency of @slurm.conf@ updates and use of @scontrol reconfigure at .
-Each hostname in @slurm.conf@ must also resolve correctly on all SLURM worker nodes as well as the controller itself. Furthermore, the hostnames used in the configuration file must match the hostnames reported by @hostname@ or @hostname -s@ on the nodes themselves. This applies to the ControlMachine as well as the worker nodes.
+Each hostname in @slurm.conf@ must also resolve correctly on all Slurm worker nodes as well as the controller itself. Furthermore, the hostnames used in the configuration file must match the hostnames reported by @hostname@ or @hostname -s@ on the nodes themselves. This applies to the ControlMachine as well as the worker nodes.
For example:
* In @slurm.conf@ on control and worker nodes: @ControlMachine=ClusterID.example.com@
diff --git a/doc/install/crunch2-slurm/install-test.html.textile.liquid b/doc/install/crunch2-slurm/install-test.html.textile.liquid
index 6d4ca9305..647995a8c 100644
--- a/doc/install/crunch2-slurm/install-test.html.textile.liquid
+++ b/doc/install/crunch2-slurm/install-test.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: installguide
-title: Test SLURM dispatch
+title: Test Slurm dispatch
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
@@ -10,19 +10,19 @@ SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
{% include 'notebox_begin_warning' %}
-crunch-dispatch-slurm is only relevant for on premise clusters that will spool jobs to Slurm. Skip this section if you are installing a cloud cluster.
+crunch-dispatch-slurm is only relevant for on premises clusters that will spool jobs to Slurm. Skip this section if you are installing a cloud cluster.
{% include 'notebox_end' %}
h2. Test compute node setup
-You should now be able to submit SLURM jobs that run in Docker containers. On the node where you're running the dispatcher, you can test this by running:
+You should now be able to submit Slurm jobs that run in Docker containers. On the node where you're running the dispatcher, you can test this by running:
<notextile>
<pre><code>~$ <span class="userinput">sudo -u <b>crunch</b> srun -N1 docker run busybox echo OK
</code></pre>
</notextile>
-If it works, this command should print @OK@ (it may also show some status messages from SLURM and/or Docker). If it does not print @OK@, double-check your compute node setup, and that the @crunch@ user can submit SLURM jobs.
+If it works, this command should print @OK@ (it may also show some status messages from Slurm and/or Docker). If it does not print @OK@, double-check your compute node setup, and that the @crunch@ user can submit Slurm jobs.
h2. Test the dispatcher
@@ -66,7 +66,7 @@ This command should return a record with a @container_uuid@ field. Once @crunch
</code></pre>
</notextile>
-Before the container finishes, SLURM's @squeue@ command will show the new job in the list of queued and running jobs. For example, you might see:
+Before the container finishes, Slurm's @squeue@ command will show the new job in the list of queued and running jobs. For example, you might see:
<notextile>
<pre><code>~$ <span class="userinput">squeue --long</span>
diff --git a/doc/install/install-manual-prerequisites.html.textile.liquid b/doc/install/install-manual-prerequisites.html.textile.liquid
index 2ce6e36a6..5c974314e 100644
--- a/doc/install/install-manual-prerequisites.html.textile.liquid
+++ b/doc/install/install-manual-prerequisites.html.textile.liquid
@@ -9,7 +9,7 @@ Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-Before attempting installation, you should begin by reviewing supported platforms, choosing backends for identity, storage, and scheduling, and decide how you will distribute Arvados services onto machines. You should also choose an Arvados Cluster ID, choose your hostnames, and aquire TLS certificates. It may be helpful to make notes as you go along using one of these worksheets: "New cluster checklist for AWS":new_cluster_checklist_AWS.xlsx - "New cluster checklist for Azure":new_cluster_checklist_Azure.xlsx - "New cluster checklist for on premise SLURM":new_cluster_checklist_slurm.xlsx
+Before attempting installation, you should begin by reviewing supported platforms, choosing backends for identity, storage, and scheduling, and decide how you will distribute Arvados services onto machines. You should also choose an Arvados Cluster ID, choose your hostnames, and aquire TLS certificates. It may be helpful to make notes as you go along using one of these worksheets: "New cluster checklist for AWS":new_cluster_checklist_AWS.xlsx - "New cluster checklist for Azure":new_cluster_checklist_Azure.xlsx - "New cluster checklist for on premises Slurm":new_cluster_checklist_slurm.xlsx
The Arvados storage subsystem is called "keep". The compute subsystem is called "crunch".
@@ -60,8 +60,8 @@ table(table table-bordered table-condensed).
|"Shell server":install-shell-server.html |Synchronize (create/delete/configure) Unix shell accounts with Arvados users.|Optional.|
|"Git server":install-arv-git-httpd.html |Arvados-hosted git repositories, with Arvados-token based authentication.|Optional, but required by Workflow Composer.|
|\3=. *Crunch (running containers)*|
-|"crunch-dispatch-slurm":crunch2-slurm/install-prerequisites.html |Run analysis workflows using Docker containers distributed across a SLURM cluster.|Optional if you wish to use Arvados for data management only.|
-|"Node Manager":install-nodemanager.html, "arvados-dispatch-cloud":install-dispatch-cloud.html |Allocate and free cloud VM instances on demand based on workload.|Optional, not needed for a static SLURM cluster (such as on-premise HPC).|
+|"crunch-dispatch-slurm":crunch2-slurm/install-prerequisites.html |Run analysis workflows using Docker containers distributed across a Slurm cluster.|Optional if you wish to use Arvados for data management only.|
+|"Node Manager":install-nodemanager.html, "arvados-dispatch-cloud":install-dispatch-cloud.html |Allocate and free cloud VM instances on demand based on workload.|Optional, not needed for a static Slurm cluster (such as on-premise HPC).|
h2(#identity). Identity provider
diff --git a/doc/install/install-nodemanager.html.textile.liquid b/doc/install/install-nodemanager.html.textile.liquid
index 431fc10b8..75e4b2513 100644
--- a/doc/install/install-nodemanager.html.textile.liquid
+++ b/doc/install/install-nodemanager.html.textile.liquid
@@ -9,13 +9,13 @@ Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-Arvados Node Manager provides elastic computing for Arvados and SLURM by creating and destroying virtual machines on demand. Node Manager currently supports Amazon Web Services (AWS), Google Cloud Platform (GCP) and Microsoft Azure.
+Arvados Node Manager provides elastic computing for Arvados and Slurm by creating and destroying virtual machines on demand. Node Manager currently supports Amazon Web Services (AWS), Google Cloud Platform (GCP) and Microsoft Azure.
Note: node manager is only required for elastic computing cloud environments. Fixed size clusters (such as on-premise HPC) do not require node manager.
h2. Install
-Node manager may run anywhere, however it must be able to communicate with the cloud provider's APIs, and use the command line tools @sinfo@, @squeue@ and @scontrol@ to communicate with the cluster's SLURM controller.
+Node manager may run anywhere, however it must be able to communicate with the cloud provider's APIs, and use the command line tools @sinfo@, @squeue@ and @scontrol@ to communicate with the cluster's Slurm controller.
On Debian-based systems:
@@ -65,8 +65,8 @@ h3(#aws). Amazon Web Services
[Daemon]
# The dispatcher can customize the start and stop procedure for
-# cloud nodes. For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
+# cloud nodes. For example, the Slurm dispatcher drains nodes
+# through Slurm before shutting them down.
dispatcher = slurm
# Node Manager will ensure that there are at least this many nodes running at
@@ -440,8 +440,8 @@ h3(#azure). Microsoft Azure
[Daemon]
# The dispatcher can customize the start and stop procedure for
-# cloud nodes. For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
+# cloud nodes. For example, the Slurm dispatcher drains nodes
+# through Slurm before shutting them down.
dispatcher = slurm
# Node Manager will ensure that there are at least this many nodes running at
diff --git a/tools/compute-images/.gitignore b/tools/compute-images/.gitignore
new file mode 100644
index 000000000..68fc77534
--- /dev/null
+++ b/tools/compute-images/.gitignore
@@ -0,0 +1,3 @@
+*pem
+secrets/*
+keypairs/*
diff --git a/tools/compute-images/.licenseignore b/tools/compute-images/.licenseignore
new file mode 100644
index 000000000..6288dbbc8
--- /dev/null
+++ b/tools/compute-images/.licenseignore
@@ -0,0 +1,5 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+*.json
+1078ECD7.asc
diff --git a/tools/compute-images/1078ECD7.asc b/tools/compute-images/1078ECD7.asc
new file mode 100644
index 000000000..edc62f48f
--- /dev/null
+++ b/tools/compute-images/1078ECD7.asc
@@ -0,0 +1,30 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQENBEzhgeoBCAChhoK1dqpWzNyDWqRGEvdFdkJaA9D2HRwKPfBfjAoePX6ZyrpA
+ItlUsvt/8s/DRiTiPEFQR4S7VqocmU6whJc3gDEGyOM6b1NF873lIfSVwUoE42QE
+a76dO8woOYgLUyxu2mKG+bJgGMumjBJt6ZOndYVjTYB/7sEeVxwmMVulfZe0s6zg
+ut0+SoTYg2R36qIqeIcWllYt97sEYnyy1qXMis4/3IZnuWkS/frsPR3aeUI4W+o2
+NDN1kj49+LMe7Fb5b7jZY08rZbAWXi1rU1hQx4jC9RvYqlT4HNld4Bn7os1IvOOA
+wNiR0oiVdiuDbBxcMvRPktxMrFVjowusRLq/ABEBAAG0PUN1cm92ZXJzZSwgSW5j
+IEF1dG9tYXRpYyBTaWduaW5nIEtleSA8c3lzYWRtaW5AY3Vyb3ZlcnNlLmNvbT6J
+ATgEEwECACIFAlNgYIECGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEFcW
+WREQeOzXPkEH/jQJDIYI1dxWcYiA+hczmpaZvN2/pc/kwIW/6a03+6zqmSNkebOE
+TgoDILacSYc17hy20R1/rWyUstOMKcEgFDBlSehhHyl0f7q/w7d8Ais6MabzsPfx
+IceJpsjUg87+BR7qWhgQ0sxmtIF2TKuTFLs+nkGsgSsiBOEF4NvHxuj3HD4y8F27
+HNqrkqwjLS8xJwwH5Gp2uMEVr1AXIH3iSRjJ8X124s8iEP97Q/3IazoYRf9/MCSm
+QEx8KzxwDX6t4bW6O4D01K+e9gdkTY70dcMgJoqm5IsX7yxjEubiOunphtlJnZ9d
+Oi1yBN5UM3pWKAdcfRj4rcfV9Simvpx9av+5AQ0ETOGB6gEIAMAA0HVMG0BbdnU7
+wWgl5eFdT0AUSrXK/WdcKqVEGGv+c68NETSHWZOJX7O46Eao4gY4cTYprVMBzxpY
+/BtQSYLpE0HLvBc1fcFd61Yz4H/9rGSNY0GcIQEbOjbJY5mr8qFsQ1K/mAf3aUL3
+b6ni4sHVicRiRr0Gl4Ihorlskpfu1SHs/C5tvTSVNF9p4vtl5892y1yILQeVpcBs
+NCR7MUpdS49xCpvnAWsDZX+ij6LTR3lzCm/ZLCg4gNuZkjgU9oqVfGkqysW7WZ8S
+OLvzAwUw7i1EIFX8q6QdudGoezxz8m8OgZM1v8AFpYEKlhEPf1W0MSfaRDwrj866
+8nCLruEAEQEAAYkBHwQYAQIACQUCTOGB6gIbDAAKCRBXFlkREHjs199EB/4+p0G1
+3PHxt6rLWSCGXobDOu4ZOA/qnv0D/JhOLroFds5TzQv6vnS8eAkhCTjHVA+b58cm
+kXpI0oYcD4ZP+KK1CHKq2rGfwou7HfAF+icnNqYkeBOkjjbCgkvBlcCInuAuU8JX
+DZMkfFk52+eBKwTjS/J/fQp0vDru8bHLp98WgdRHWfJQ3mc3gz4A5sR6zhrGPW6/
+ssnROS4dC2Ohp35GpgN1KjD3EmEw5RoSBYlyrARCaMsivgIKMxGUEyFZWhuJt3N1
+2MTddRwz28hbmYCi+MzHYDbRv+cSyUDmvXaWhfkNKBepClBA1rTWBcldit5vvlqr
+yPet6wIKrtLGhAqZ
+=CLkG
+-----END PGP PUBLIC KEY BLOCK-----
diff --git a/tools/compute-images/arvados-images-aws.json b/tools/compute-images/arvados-images-aws.json
new file mode 100644
index 000000000..4832208e4
--- /dev/null
+++ b/tools/compute-images/arvados-images-aws.json
@@ -0,0 +1,80 @@
+{
+ "variables": {
+ "aws_access_key": "",
+ "aws_secret_key": "",
+ "aws_profile": "",
+ "build_environment": "aws",
+ "arvados_cluster": "",
+ "aws_source_ami": "ami-04d70e069399af2e9",
+ "fqdn": "",
+ "ssh_user": "admin",
+ "vpc_id": "",
+ "subnet_id": "",
+ "public_key_file": "",
+ "associate_public_ip_address": "true"
+ },
+ "builders": [{
+ "type": "amazon-ebs",
+ "profile": "{{ user `aws_profile`}}",
+ "access_key": "{{user `aws_access_key`}}",
+ "secret_key": "{{user `aws_secret_key`}}",
+ "region": "{{user `aws_default_region`}}",
+ "ena_support": "true",
+ "source_ami": "{{user `aws_source_ami`}}",
+ "instance_type": "m4.large",
+ "vpc_id": "{{user `vpc_id`}}",
+ "subnet_id": "{{user `subnet_id`}}",
+ "associate_public_ip_address": "{{user `associate_public_ip_address`}}",
+ "ssh_username": "{{user `ssh_user`}}",
+ "ami_name": "arvados-{{user `arvados_cluster`}}-compute-{{isotime \"20060102150405\"}}",
+ "ami_block_device_mappings": [
+ {
+ "device_name": "/dev/xvdb",
+ "encrypted": true,
+ "virtual_name": "ephemeral0"
+ },
+ {
+ "device_name": "/dev/xvdc",
+ "encrypted": true,
+ "virtual_name": "ephemeral1"
+ }
+ ],
+ "tags": {
+ "Name": "arvados-{{user `arvados_cluster`}}-compute",
+ "creation_date": "{{isotime \"20060102150405\"}}",
+ "packer": "true"
+ },
+ "run_tags": {
+ "Name": "packer-arvados-{{user `arvados_cluster`}}-compute-builder",
+ "creation_date": "{{isotime \"20060102150405\"}}",
+ "environment": "development"
+ },
+ "run_volume_tags": {
+ "Name": "packer-arvados-{{user `arvados_cluster`}}-compute-builder",
+ "creation_date": "{{isotime \"20060102150405\"}}",
+ "environment": "development"
+ }
+ }],
+ "provisioners": [{
+ "type": "file",
+ "source": "1078ECD7.asc",
+ "destination": "/tmp/1078ECD7.asc"
+ },{
+ "type": "file",
+ "source": "scripts/etc-cloud-cloud.cfg.d-07_compute_puppetless_arvados_dispatch_cloud.cfg",
+ "destination": "/tmp/etc-cloud-cloud.cfg.d-07_compute_puppetless_arvados_dispatch_cloud.cfg"
+ },{
+ "type": "file",
+ "source": "scripts/usr-local-bin-ensure-encrypted-partitions.sh",
+ "destination": "/tmp/usr-local-bin-ensure-encrypted-partitions.sh"
+ },{
+ "type": "file",
+ "source": "{{user `public_key_file`}}",
+ "destination": "/tmp/crunch-authorized_keys"
+ },{
+ "type": "shell",
+ "execute_command": "sudo -S env {{ .Vars }} /bin/bash '{{ .Path }}'",
+ "script": "scripts/base.sh",
+ "environment_vars": ["ROLE=compute","RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}"]
+ }]
+}
diff --git a/tools/compute-images/arvados-images-azure.json b/tools/compute-images/arvados-images-azure.json
new file mode 100644
index 000000000..3a80f9c12
--- /dev/null
+++ b/tools/compute-images/arvados-images-azure.json
@@ -0,0 +1,75 @@
+{
+ "variables": {
+ "storage_account": null,
+ "resource_group": null,
+ "client_id": "{{env `ARM_CLIENT_ID`}}",
+ "client_secret": "{{env `ARM_CLIENT_SECRET`}}",
+ "subscription_id": "{{env `ARM_SUBSCRIPTION_ID`}}",
+ "tenant_id": "{{env `ARM_TENANT_ID`}}",
+ "build_environment": "azure-arm",
+ "cloud_environment_name": "Public",
+ "location": "centralus",
+ "ssh_user": "packer",
+ "ssh_private_key_file": "{{env `PACKERPRIVKEY`}}",
+ "image_sku": "",
+ "arvados_cluster": "",
+ "project_id": "",
+ "account_file": "",
+ "fqdn": "",
+ "resolver": "",
+ "reposuffix": "",
+ "public_key_file": ""
+ },
+ "builders": [
+ {
+ "type": "azure-arm",
+ "cloud_environment_name": "{{user `cloud_environment_name`}}",
+
+ "client_id": "{{user `client_id`}}",
+ "client_secret": "{{user `client_secret`}}",
+ "subscription_id": "{{user `subscription_id`}}",
+ "tenant_id": "{{user `tenant_id`}}",
+
+ "resource_group_name": "{{user `resource_group`}}",
+ "storage_account": "{{user `storage_account`}}",
+
+ "capture_container_name": "images",
+ "capture_name_prefix": "{{user `arvados_cluster`}}-compute",
+
+ "ssh_username": "{{user `ssh_user`}}",
+ "ssh_private_key_file": "{{user `ssh_private_key_file`}}",
+
+ "image_publisher": "Canonical",
+ "image_offer": "UbuntuServer",
+ "image_sku": "{{user `image_sku`}}",
+
+ "os_type": "Linux",
+
+ "location": "{{user `location`}}",
+ "vm_size": "Standard_D1_v2"
+ }
+ ],
+
+ "provisioners": [{
+ "type": "file",
+ "source": "1078ECD7.asc",
+ "destination": "/tmp/1078ECD7.asc"
+ },{
+ "type": "file",
+ "source": "scripts/etc-cloud-cloud.cfg.d-07_compute_puppetless_arvados_dispatch_cloud.cfg",
+ "destination": "/tmp/etc-cloud-cloud.cfg.d-07_compute_puppetless_arvados_dispatch_cloud.cfg"
+ },{
+ "type": "file",
+ "source": "scripts/usr-local-bin-ensure-encrypted-partitions.sh",
+ "destination": "/tmp/usr-local-bin-ensure-encrypted-partitions.sh"
+ },{
+ "type": "file",
+ "source": "{{user `public_key_file`}}",
+ "destination": "/tmp/crunch-authorized_keys"
+ },{
+ "type": "shell",
+ "execute_command": "sudo -S env {{ .Vars }} /bin/bash '{{ .Path }}'",
+ "script": "scripts/base.sh",
+ "environment_vars": ["ROLE=compute","RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}"]
+ }]
+}
diff --git a/tools/compute-images/build.sh b/tools/compute-images/build.sh
new file mode 100755
index 000000000..e8265ae19
--- /dev/null
+++ b/tools/compute-images/build.sh
@@ -0,0 +1,277 @@
+#!/bin/bash
+
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+JSON_FILE=$1
+ARVADOS_CLUSTER=$2
+PROJECT_ID=$3
+ACCOUNT_FILE=$4
+
+read -rd "\000" helpmessage <<EOF
+$(basename $0): Build cloud images for arvados-dispatch-cloud
+
+Syntax:
+ $(basename $0) [options]
+
+Options:
+
+ --json-file (required)
+ Path to the packer json file
+ --arvados-cluster-id (required)
+ The ID of the Arvados cluster, e.g. zzzzz
+ --aws-profile (default: false)
+ AWS profile to use (valid profile from ~/.aws/config
+ --aws-secrets-file (default: false, required if building for AWS)
+ AWS secrets file which will be sourced from this script
+ --aws-source-ami (default: false, required if building for AWS)
+ The AMI to use as base for building the images
+ --aws-region (default: us-east-1)
+ The AWS region to use for building the images
+ --aws-vpc-id (optional)
+ VPC id for AWS, otherwise packer will pick the default one
+ --aws-subnet-id
+ Subnet id for AWS otherwise packer will pick the default one for the VPC
+ --gcp-project-id (default: false, required if building for GCP)
+ GCP project id
+ --gcp-account-file (default: false, required if building for GCP)
+ GCP account file
+ --gcp-zone (default: us-central1-f)
+ GCP zone
+ --azure-secrets-file (default: false, required if building for Azure)
+ Azure secrets file which will be sourced from this script
+ --azure-resource-group (default: false, required if building for Azure)
+ Azure resource group
+ --azure-storage-account (default: false, required if building for Azure)
+ Azure storage account
+ --azure-location (default: false, required if building for Azure)
+ Azure location, e.g. centralus, eastus, westeurope
+ --azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS)
+ Azure SKU image to use
+ --ssh_user (default: packer)
+ The user packer will use to log into the image
+ --domain (default: arvadosapi.com)
+ The domain part of the FQDN for the cluster
+ --resolver (default: 8.8.8.8)
+ The dns resolver for the machine
+ --reposuffix (default: unset)
+ Set this to "-dev" to track the unstable/dev Arvados repositories
+ --public-key-file (required)
+ Path to the public key file that a-d-c will use to log into the compute node
+ --debug
+ Output debug information (default: false)
+
+EOF
+
+JSON_FILE=
+ARVADOS_CLUSTER_ID=
+AWS_PROFILE=
+AWS_SECRETS_FILE=
+AWS_SOURCE_AMI=
+AWS_VPC_ID=
+AWS_SUBNET_ID=
+GCP_PROJECT_ID=
+GCP_ACCOUNT_FILE=
+GCP_ZONE=
+AZURE_SECRETS_FILE=
+AZURE_RESOURCE_GROUP=
+AZURE_STORAGE_ACCOUNT=
+AZURE_LOCATION=
+AZURE_CLOUD_ENVIRONMENT=
+DEBUG=
+SSH_USER=
+DOMAIN="arvadosapi.com"
+AWS_DEFAULT_REGION=us-east-1
+PUBLIC_KEY_FILE=
+
+PARSEDOPTS=$(getopt --name "$0" --longoptions \
+ help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-storage-account:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,domain:,resolver:,reposuffix:,public-key-file:,debug \
+ -- "" "$@")
+if [ $? -ne 0 ]; then
+ exit 1
+fi
+
+eval set -- "$PARSEDOPTS"
+while [ $# -gt 0 ]; do
+ case "$1" in
+ --help)
+ echo >&2 "$helpmessage"
+ echo >&2
+ exit 1
+ ;;
+ --json-file)
+ JSON_FILE="$2"; shift
+ ;;
+ --arvados-cluster-id)
+ ARVADOS_CLUSTER_ID="$2"; shift
+ ;;
+ --aws-source-ami)
+ AWS_SOURCE_AMI="$2"; shift
+ ;;
+ --aws-profile)
+ AWS_PROFILE="$2"; shift
+ ;;
+ --aws-secrets-file)
+ AWS_SECRETS_FILE="$2"; shift
+ ;;
+ --aws-region)
+ AWS_DEFAULT_REGION="$2"; shift
+ ;;
+ --aws-vpc-id)
+ AWS_VPC_ID="$2"; shift
+ ;;
+ --aws-subnet-id)
+ AWS_SUBNET_ID="$2"; shift
+ ;;
+ --gcp-project-id)
+ GCP_PROJECT_ID="$2"; shift
+ ;;
+ --gcp-account-file)
+ GCP_ACCOUNT_FILE="$2"; shift
+ ;;
+ --gcp-zone)
+ GCP_ZONE="$2"; shift
+ ;;
+ --azure-secrets-file)
+ AZURE_SECRETS_FILE="$2"; shift
+ ;;
+ --azure-resource-group)
+ AZURE_RESOURCE_GROUP="$2"; shift
+ ;;
+ --azure-storage-account)
+ AZURE_STORAGE_ACCOUNT="$2"; shift
+ ;;
+ --azure-location)
+ AZURE_LOCATION="$2"; shift
+ ;;
+ --azure-sku)
+ AZURE_SKU="$2"; shift
+ ;;
+ --azure-cloud-environment)
+ AZURE_CLOUD_ENVIRONMENT="$2"; shift
+ ;;
+ --ssh_user)
+ SSH_USER="$2"; shift
+ ;;
+ --domain)
+ DOMAIN="$2"; shift
+ ;;
+ --resolver)
+ RESOLVER="$2"; shift
+ ;;
+ --reposuffix)
+ REPOSUFFIX="$2"; shift
+ ;;
+ --public-key-file)
+ PUBLIC_KEY_FILE="$2"; shift
+ ;;
+ --debug)
+ # If you want to debug a build issue, add the -debug flag to the build
+ # command in question.
+ # This will allow you to ssh in, if you use the .pem file that packer
+ # generates in this directory as the ssh key. The base image uses the admin
+ # user and ssh port 22.
+ EXTRA=" -debug"
+ ;;
+ --)
+ if [ $# -gt 1 ]; then
+ echo >&2 "$0: unrecognized argument '$2'. Try: $0 --help"
+ exit 1
+ fi
+ ;;
+ esac
+ shift
+done
+
+
+if [[ "$JSON_FILE" == "" ]] || [[ ! -f "$JSON_FILE" ]]; then
+ echo >&2 "$helpmessage"
+ echo >&2
+ echo >&2 "ERROR: packer json file not found"
+ echo >&2
+ exit 1
+fi
+
+if [[ -z "$ARVADOS_CLUSTER_ID" ]]; then
+ echo >&2 "$helpmessage"
+ echo >&2
+ echo >&2 "ERROR: arvados cluster id not specified"
+ echo >&2
+ exit 1
+fi
+
+if [[ "$PUBLIC_KEY_FILE" == "" ]] || [[ ! -f "$PUBLIC_KEY_FILE" ]]; then
+ echo >&2 "$helpmessage"
+ echo >&2
+ echo >&2 "ERROR: public key file file not found"
+ echo >&2
+ exit 1
+fi
+
+if [[ ! -z "$AWS_SECRETS_FILE" ]]; then
+ source $AWS_SECRETS_FILE
+fi
+
+if [[ ! -z "$AZURE_SECRETS_FILE" ]]; then
+ source $AZURE_SECRETS_FILE
+fi
+
+FQDN=" -var fqdn=compute.$ARVADOS_CLUSTER_ID.$DOMAIN ";
+
+EXTRA2=""
+
+if [[ "$AWS_SOURCE_AMI" != "" ]]; then
+ EXTRA2+=" -var aws_source_ami=$AWS_SOURCE_AMI"
+fi
+if [[ "$AWS_PROFILE" != "" ]]; then
+ EXTRA2+=" -var aws_profile=$AWS_PROFILE"
+fi
+if [[ "$AWS_VPC_ID" != "" ]]; then
+ EXTRA2+=" -var vpc_id=$AWS_VPC_ID -var associate_public_ip_address=true "
+fi
+if [[ "$AWS_SUBNET_ID" != "" ]]; then
+ EXTRA2+=" -var subnet_id=$AWS_SUBNET_ID -var associate_public_ip_address=true "
+fi
+if [[ "$AWS_DEFAULT_REGION" != "" ]]; then
+ EXTRA2+=" -var aws_default_region=$AWS_DEFAULT_REGION"
+fi
+if [[ "$GCP_PROJECT_ID" != "" ]]; then
+ EXTRA2+=" -var project_id=$GCP_PROJECT_ID"
+fi
+if [[ "$GCP_ACCOUNT_FILE" != "" ]]; then
+ EXTRA2+=" -var account_file=$GCP_ACCOUNT_FILE"
+fi
+if [[ "$GCP_ZONE" != "" ]]; then
+ EXTRA2+=" -var zone=$GCP_ZONE"
+fi
+if [[ "$AZURE_RESOURCE_GROUP" != "" ]]; then
+ EXTRA2+=" -var resource_group=$AZURE_RESOURCE_GROUP"
+fi
+if [[ "$AZURE_STORAGE_ACCOUNT" != "" ]]; then
+ EXTRA2+=" -var storage_account=$AZURE_STORAGE_ACCOUNT"
+fi
+if [[ "$AZURE_LOCATION" != "" ]]; then
+ EXTRA2+=" -var location=$AZURE_LOCATION"
+fi
+if [[ "$AZURE_SKU" != "" ]]; then
+ EXTRA2+=" -var image_sku=$AZURE_SKU"
+fi
+if [[ "$AZURE_CLOUD_ENVIRONMENT" != "" ]]; then
+ EXTRA2+=" -var cloud_environment_name=$AZURE_CLOUD_ENVIRONMENT"
+fi
+if [[ "$SSH_USER" != "" ]]; then
+ EXTRA2+=" -var ssh_user=$SSH_USER"
+fi
+if [[ "$RESOLVER" != "" ]]; then
+ EXTRA2+=" -var resolver=$RESOLVER"
+fi
+if [[ "$REPOSUFFIX" != "" ]]; then
+ EXTRA2+=" -var reposuffix=$REPOSUFFIX"
+fi
+if [[ "$PUBLIC_KEY_FILE" != "" ]]; then
+ EXTRA2+=" -var public_key_file=$PUBLIC_KEY_FILE"
+fi
+
+echo packer build$EXTRA$FQDN -var "role=$role" -var "arvados_cluster=$ARVADOS_CLUSTER_ID"$EXTRA2 $JSON_FILE
+packer build$EXTRA$FQDN -var "role=$role" -var "arvados_cluster=$ARVADOS_CLUSTER_ID"$EXTRA2 $JSON_FILE
diff --git a/tools/compute-images/scripts/base.sh b/tools/compute-images/scripts/base.sh
new file mode 100644
index 000000000..4b134b9db
--- /dev/null
+++ b/tools/compute-images/scripts/base.sh
@@ -0,0 +1,110 @@
+#!/bin/bash -euxo pipefail
+
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+SUDO=sudo
+
+# Run apt-get update
+$SUDO DEBIAN_FRONTEND=noninteractive apt-get --yes update
+
+# Install gnupg and dirmgr or gpg key checks will fail
+$SUDO DEBIAN_FRONTEND=noninteractive apt-get -qq --yes install \
+ gnupg \
+ dirmngr \
+ lsb-release
+
+# For good measure, apt-get upgrade
+$SUDO DEBIAN_FRONTEND=noninteractive apt-get -qq --yes upgrade
+
+# Make sure cloud-init is installed
+$SUDO DEBIAN_FRONTEND=noninteractive apt-get -qq --yes install cloud-init
+if [[ ! -d /var/lib/cloud/scripts/per-boot ]]; then
+ mkdir -p /var/lib/cloud/scripts/per-boot
+fi
+
+TMP_LSB=`/usr/bin/lsb_release -c -s`
+LSB_RELEASE_CODENAME=${TMP_LSB//[$'\t\r\n ']}
+
+# Add the arvados apt repository
+echo "# apt.arvados.org" |$SUDO tee --append /etc/apt/sources.list.d/apt.arvados.org.list
+echo "deb http://apt.arvados.org/ $LSB_RELEASE_CODENAME${REPOSUFFIX} main" |$SUDO tee --append /etc/apt/sources.list.d/apt.arvados.org.list
+
+# Add the arvados signing key
+cat /tmp/1078ECD7.asc | $SUDO apt-key add -
+# Add the debian keys
+$SUDO DEBIAN_FRONTEND=noninteractive apt-get install --yes debian-keyring debian-archive-keyring
+
+# Fix locale
+$SUDO /bin/sed -ri 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen
+$SUDO /usr/sbin/locale-gen
+
+# Install some packages we always need
+$SUDO DEBIAN_FRONTEND=noninteractive apt-get --yes update
+$SUDO DEBIAN_FRONTEND=noninteractive apt-get -qq --yes install \
+ openssh-server \
+ apt-utils \
+ git \
+ curl \
+ libcurl3-gnutls \
+ libcurl4-openssl-dev \
+ lvm2 \
+ cryptsetup \
+ xfsprogs
+
+# See if python3-distutils is installable, and if so install it. This is a
+# temporary workaround for an Arvados packaging bug and should be removed once
+# Arvados 2.0.4 or 2.1.0 is released, whichever comes first.
+# See https://dev.arvados.org/issues/16611 for more information
+if apt-cache -qq show python3-distutils >/dev/null 2>&1; then
+ $SUDO DEBIAN_FRONTEND=noninteractive apt-get -qq --yes install python3-distutils
+fi
+
+# Install the Arvados packages we need
+$SUDO DEBIAN_FRONTEND=noninteractive apt-get -qq --yes install \
+ python-arvados-fuse \
+ crunch-run \
+ arvados-docker-cleaner \
+ docker.io
+
+# Remove unattended-upgrades if it is installed
+$SUDO DEBIAN_FRONTEND=noninteractive apt-get -qq --yes remove unattended-upgrades --purge
+
+# Configure arvados-docker-cleaner
+$SUDO mkdir -p /etc/arvados/docker-cleaner
+$SUDO echo -e "{\n \"Quota\": \"10G\",\n \"RemoveStoppedContainers\": \"always\"\n}" > /etc/arvados/docker-cleaner/docker-cleaner.json
+
+# Enable cgroup accounting
+$SUDO sed -i 's/GRUB_CMDLINE_LINUX=""/GRUB_CMDLINE_LINUX="cgroup_enable=memory swapaccount=1"/g' /etc/default/grub
+$SUDO update-grub
+
+# Set a higher ulimit for docker
+$SUDO sed -i "s/ExecStart=\(.*\)/ExecStart=\1 --default-ulimit nofile=10000:10000 --dns ${RESOLVER}/g" /lib/systemd/system/docker.service
+$SUDO systemctl daemon-reload
+
+# Make sure user_allow_other is set in fuse.conf
+$SUDO sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf
+
+# Add crunch user with sudo powers
+$SUDO adduser --disabled-password --gecos "Crunch user,,,," crunch
+# Do not require a password to sudo
+echo -e "# for the crunch user\ncrunch ALL=(ALL) NOPASSWD:ALL" | $SUDO tee /etc/sudoers.d/91-crunch
+
+# Set up the ssh public key for the crunch user
+$SUDO mkdir /home/crunch/.ssh
+$SUDO mv /tmp/crunch-authorized_keys /home/crunch/.ssh/authorized_keys
+$SUDO chown -R crunch:crunch /home/crunch/.ssh
+$SUDO chmod 600 /home/crunch/.ssh/authorized_keys
+$SUDO chmod 700 /home/crunch/.ssh/
+
+# Make sure we resolve via the provided resolver IP. Prepending is good enough because
+# unless 'rotate' is set, the nameservers are queried in order (cf. man resolv.conf)
+$SUDO sed -i "s/#prepend domain-name-servers 127.0.0.1;/prepend domain-name-servers ${RESOLVER};/" /etc/dhcp/dhclient.conf
+
+# Set up the cloud-init script that will ensure encrypted disks
+$SUDO mv /tmp/usr-local-bin-ensure-encrypted-partitions.sh /usr/local/bin/ensure-encrypted-partitions.sh
+$SUDO chmod 755 /usr/local/bin/ensure-encrypted-partitions.sh
+$SUDO chown root:root /usr/local/bin/ensure-encrypted-partitions.sh
+$SUDO mv /tmp/etc-cloud-cloud.cfg.d-07_compute_puppetless_arvados_dispatch_cloud.cfg /etc/cloud/cloud.cfg.d/07_compute_puppetless_arvados_dispatch_cloud.cfg
+$SUDO chown root:root /etc/cloud/cloud.cfg.d/07_compute_puppetless_arvados_dispatch_cloud.cfg
diff --git a/tools/compute-images/scripts/etc-cloud-cloud.cfg.d-07_compute_puppetless_arvados_dispatch_cloud.cfg b/tools/compute-images/scripts/etc-cloud-cloud.cfg.d-07_compute_puppetless_arvados_dispatch_cloud.cfg
new file mode 100644
index 000000000..febeda372
--- /dev/null
+++ b/tools/compute-images/scripts/etc-cloud-cloud.cfg.d-07_compute_puppetless_arvados_dispatch_cloud.cfg
@@ -0,0 +1,9 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+runcmd:
+ - /bin/echo "cloudinit runcmd starting" | /usr/bin/logger
+ - /usr/local/bin/ensure-encrypted-partitions.sh
+ - /bin/echo "cloudinit runcmd ensure-encrypted-partitions.sh done" | /usr/bin/logger
+ - /bin/echo "cloudinit runcmd finished" | /usr/bin/logger
+ - /bin/touch /arvados-compute-node-boot.complete
diff --git a/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions.sh b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions.sh
new file mode 100644
index 000000000..b24e437f0
--- /dev/null
+++ b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+set -x
+
+VGNAME=compute
+LVNAME=tmp
+LVPATH="/dev/mapper/${VGNAME}-${LVNAME}"
+CRYPTPATH=/dev/mapper/tmp
+MOUNTPATH=/tmp
+
+findmntq() {
+ findmnt "$@" >/dev/null
+}
+
+ensure_umount() {
+ if findmntq "$1"; then
+ umount "$1"
+ fi
+}
+
+if findmntq --source "$CRYPTPATH" --target "$MOUNTPATH"; then
+ exit 0
+fi
+
+CLOUD_SERVER=""
+while [[ ! "$CLOUD_SERVER" ]]; do
+ CLOUD_SERVER="$(curl --silent --head http://169.254.169.254/ \
+ | awk '($1 == "Server:"){sub("\\r+$", ""); print substr($0, 9)}')"
+done
+
+DISK_PATTERN=""
+case "$CLOUD_SERVER" in
+ # EC2
+ EC2ws) DISK_PATTERN=/dev/xvd ;;
+ # GCP
+ "Metadata Server for VM") DISK_PATTERN=/dev/sd ;;
+ # Azure
+ Microsoft-IIS/*) DISK_PATTERN=/dev/sd ;;
+esac
+
+if [[ -z "$DISK_PATTERN" ]]; then
+ echo "ensure-encrypted-partitions: Unknown disk configuration; can't run." >&2
+ exit 3
+fi
+
+declare -a LVM_DEVS=()
+
+ROOT_PARTITION=`findmnt / -f -o source -n`
+if [[ "$ROOT_PARTITION" =~ ^\/dev\/nvme ]]; then
+ # e.g. /dev/nvme0n1p1, strip last 4 characters
+ ROOT_DEVICE_STRING=${ROOT_PARTITION%????}
+else
+ # e.g. /dev/xvda1, strip last character
+ ROOT_DEVICE_STRING=${ROOT_PARTITION//[0-9]/}
+fi
+
+# Newer AWS node types use another pattern, /dev/nvmeXn1 for fast instance SSD disks
+if [[ "$CLOUD_SERVER" == "EC2ws" ]]; then
+ for dev in `ls /dev/nvme* 2>/dev/null`; do
+ if [[ "$dev" == "$ROOT_PARTITION" ]] || [[ "$dev" =~ ^$ROOT_DEVICE_STRING ]]; then
+ continue
+ fi
+ if [[ -e ${dev}n1 ]]; then
+ ensure_umount "${dev}n1"
+ if [[ "$devtype" = disk ]]; then
+ dd if=/dev/zero of="${dev}n1" bs=512 count=1
+ fi
+ LVM_DEVS+=("${dev}n1")
+ fi
+ done
+fi
+
+# Look for traditional disks but only if we're not on AWS or if we haven't found
+# a fast instance /dev/nvmeXn1 disk
+if [[ "$CLOUD_SERVER" != "EC2ws" ]] || [[ ${#LVM_DEVS[@]} -eq 0 ]]; then
+ for dev in `ls $DISK_PATTERN* 2>/dev/null`; do
+ # On Azure, we are dealing with /dev/sdb1, on GCP, /dev/sdb, on AWS, /dev/xvdb
+ if [[ "$dev" == "$ROOT_PARTITION" ]] || [[ "$dev" =~ ^$ROOT_DEVICE_STRING ]]; then
+ continue
+ fi
+ if [[ ! "$dev" =~ [a-z]$ ]]; then
+ continue
+ fi
+ if [[ -e ${dev}1 ]]; then
+ dev=${dev}1
+ devtype=partition
+ else
+ devtype=disk
+ fi
+ ensure_umount "$dev"
+ if [[ "$devtype" = disk ]]; then
+ dd if=/dev/zero of="$dev" bs=512 count=1
+ fi
+ LVM_DEVS+=("$dev")
+ done
+fi
+
+if [[ "${#LVM_DEVS[@]}" -eq 0 ]]; then
+ echo "ensure-encrypted-partitions: No extra disks found." >&2
+ exit 4
+fi
+
+vgcreate --force --yes "$VGNAME" "${LVM_DEVS[@]}"
+lvcreate --extents 100%FREE --name "$LVNAME" "$VGNAME"
+
+KEYPATH="$(mktemp -p /var/tmp key-XXXXXXXX.tmp)"
+modprobe dm_mod aes sha256
+head -c321 /dev/urandom >"$KEYPATH"
+echo YES | cryptsetup luksFormat "$LVPATH" "$KEYPATH"
+cryptsetup --key-file "$KEYPATH" luksOpen "$LVPATH" "$(basename "$CRYPTPATH")"
+shred -u "$KEYPATH"
+mkfs.xfs "$CRYPTPATH"
+
+# First make sure docker is not using /tmp, then unmount everything under it.
+if [ -d /etc/sv/docker.io ]
+then
+ sv stop docker.io || service stop docker.io || true
+else
+ service docker stop || true
+fi
+
+ensure_umount "$MOUNTPATH/docker/aufs"
+
+MOUNTOPTIONS="async"
+mount -o ${MOUNTOPTIONS} "$CRYPTPATH" "$MOUNTPATH"
+chmod a+w,+t "$MOUNTPATH"
+
+# restart docker
+if [ -d /etc/sv/docker.io ]
+then
+ ## runit
+ sv up docker.io
+else
+ service docker start
+fi
+
+end=$((SECONDS+60))
+
+while [ $SECONDS -lt $end ]; do
+ if /usr/bin/docker ps -q >/dev/null; then
+ exit 0
+ fi
+ sleep 1
+done
+
+# Docker didn't start within a minute, abort
+exit 1
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list