[ARVADOS] updated: 2.3.2-37-gb93039270
Git user
git at public.arvados.org
Tue Feb 22 19:04:57 UTC 2022
Summary of changes:
.../install-compute-node.html.textile.liquid | 120 +++++++++++++++++----
.../install-dispatch-cloud.html.textile.liquid | 26 +++++
lib/cloud/ec2/ec2.go | 21 ++--
lib/config/config.default.yml | 3 +
tools/compute-images/arvados-images-aws.json | 13 ++-
tools/compute-images/build.sh | 52 +++++----
tools/compute-images/scripts/base.sh | 26 ++++-
.../scripts/create-ebs-volume-nvme.patch | 49 +++++++++
...nsure-encrypted-partitions-aws-ebs-autoscale.sh | 60 +++++++++++
9 files changed, 315 insertions(+), 55 deletions(-)
create mode 100644 tools/compute-images/scripts/create-ebs-volume-nvme.patch
create mode 100644 tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh
via b9303927025a662811741a924479c88a2617cf55 (commit)
via 46ea1156952f4ecd9ef939c406af08b3cfe66795 (commit)
via 4ca18c163d557d14c27abecf86bb637d8009d36d (commit)
via 13067dd6ee45bdb57c87da7311e70b42d8094323 (commit)
via c7a97e3ae140c48331a27817acc9929a07aa515a (commit)
via 28ffc3ab52c5ae7adacf5c7906082d7aa0b6dc54 (commit)
from 0677b9ccf4b9961baf982469ab20d8fc49022195 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit b9303927025a662811741a924479c88a2617cf55
Author: Ward Vandewege <ward at curii.com>
Date: Tue Feb 22 13:52:36 2022 -0500
18772: a few more improvements for the autoscaler script, from the
review comments.
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/tools/compute-images/scripts/create-ebs-volume-nvme.patch b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
index b6ef81148..79ce487d5 100644
--- a/tools/compute-images/scripts/create-ebs-volume-nvme.patch
+++ b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
@@ -5,9 +5,9 @@
Make the create-ebs-volume script work with nvme devices.
diff --git a/bin/create-ebs-volume b/bin/create-ebs-volume
-index 6857564..efeac35 100755
---- a/create-ebs-volume
-+++ b/create-ebs-volume
+index 6857564..e3122fa 100755
+--- a/bin/create-ebs-volume
++++ b/bin/create-ebs-volume
@@ -149,10 +149,11 @@ function get_next_logical_device() {
for letter in ${alphabet[@]}; do
# use /dev/xvdb* device names to avoid contention for /dev/sd* and /dev/xvda names
@@ -22,11 +22,9 @@ index 6857564..efeac35 100755
done
}
-@@ -322,13 +323,21 @@ function create_and_attach_volume() {
- set -e
+@@ -323,8 +324,13 @@ function create_and_attach_volume() {
logthis "waiting for volume $volume_id on filesystem"
-+ set +e
while true; do
- if [ -e "$device" ]; then
- logthis "volume $volume_id on filesystem as $device"
@@ -34,19 +32,13 @@ index 6857564..efeac35 100755
+ valid_volume_id=`echo $volume_id |sed -e 's/[^a-zA-Z0-9]//'`
+ # example lsblk output:
+ # nvme4n1 259:7 0 150G 0 disk vol00338247831716a7b
-+ LSBLK=`lsblk -o +SERIAL |grep $valid_volume_id`
-+ if [[ $? -eq 0 ]]; then
-+ nvme_device=`echo $LSBLK|cut -f1 -d' '|xargs -I {} echo "/dev/{}"`
++ if LSBLK=`lsblk -o NAME,SERIAL |grep $valid_volume_id`; then
++ nvme_device=/dev/`echo $LSBLK|cut -f1 -d' '`
+ logthis "volume $volume_id on filesystem as $nvme_device (aws device $device)"
break
fi
sleep 1
- done
-+ set -e
-
- # set volume delete on termination
- aws ec2 modify-instance-attribute \
-@@ -338,7 +347,7 @@ function create_and_attach_volume() {
+@@ -338,7 +344,7 @@ function create_and_attach_volume() {
> /dev/null
logthis "volume $volume_id DeleteOnTermination ENABLED"
commit 46ea1156952f4ecd9ef939c406af08b3cfe66795
Author: Ward Vandewege <ward at curii.com>
Date: Mon Feb 21 20:37:47 2022 -0500
18772: address review feedback.
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
index 033fb93d6..ebecc049a 100644
--- a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
+++ b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
@@ -168,7 +168,40 @@ For @ClusterID@, fill in your cluster ID. The @VPC@ and @Subnet@ should be confi
h3(#aws-ebs-autoscaler). Autoscaling compute node scratch space
-If you want to add the AWS EBS autoscaler daemon in your images, add the @--aws-ebs-autoscale@ flag to the "the build script":#building. Doing so will make the compute image scratch space scale automatically as needed. The @Containers/InstanceTypes@ list should be modified so that all @AddedScratch@ lines are removed, and the @IncludedScratch@ value should be set to a (fictional) high number. This way, the scratch space requirements will be met by all the defined instance type. For example:
+If you want to add the "AWS EBS autoscaler":https://github.com/awslabs/amazon-ebs-autoscale daemon in your images, add the @--aws-ebs-autoscale@ flag to the "the build script":#building. Doing so will make the compute image scratch space scale automatically as needed.
+
+The AWS EBS autoscaler daemon will be installed with this configuration:
+
+<notextile><pre><code>{
+ "mountpoint": "/tmp",
+ "filesystem": "lvm.ext4",
+ "lvm": {
+ "volume_group": "autoscale_vg",
+ "logical_volume": "autoscale_lv"
+ },
+ "volume": {
+ "type": "gp3",
+ "iops": 3000,
+ "encrypted": 1
+ },
+ "detection_interval": 2,
+ "limits": {
+ "max_ebs_volume_size": 1500,
+ "max_logical_volume_size": 8000,
+ "max_ebs_volume_count": 16
+ },
+ "logging": {
+ "log_file": "/var/log/ebs-autoscale.log",
+ "log_interval": 300
+ }
+}
+</code></pre></notextile>
+
+Changing the configuration is left as an exercise for the reader.
+
+Using this feature also requires a few Arvados configuration changes in @config.yml@:
+
+* The @Containers/InstanceTypes@ list should be modified so that all @AddedScratch@ lines are removed, and the @IncludedScratch@ value should be set to a (fictional) high number. This way, the scratch space requirements will be met by all the defined instance type. For example:
<notextile><pre><code> InstanceTypes:
c5large:
@@ -186,6 +219,32 @@ If you want to add the AWS EBS autoscaler daemon in your images, add the @--aws-
...
</code></pre></notextile>
+* You will also need to create an IAM role in AWS with these permissions:
+
+<notextile><pre><code>{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Effect": "Allow",
+ "Action": [
+ "ec2:AttachVolume",
+ "ec2:DescribeVolumeStatus",
+ "ec2:DescribeVolumes",
+ "ec2:DescribeTags",
+ "ec2:ModifyInstanceAttribute",
+ "ec2:DescribeVolumeAttribute",
+ "ec2:CreateVolume",
+ "ec2:DeleteVolume",
+ "ec2:CreateTags"
+ ],
+ "Resource": "*"
+ }
+ ]
+}
+</code></pre></notextile>
+
+Then, in @config.yml@ set @Containers/CloudVMs/DriverParameters/IAMInstanceProfile@ to the name of the IAM role. This will make @arvados-dispatch-cloud@ pass an IAMInstanceProfile to the compute nodes as they start up, giving them sufficient permissions to attach and grow EBS volumes.
+
h2(#azure). Build an Azure image
<notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-azure.json \
diff --git a/lib/cloud/ec2/ec2.go b/lib/cloud/ec2/ec2.go
index 2cbe4cf29..52b73f781 100644
--- a/lib/cloud/ec2/ec2.go
+++ b/lib/cloud/ec2/ec2.go
@@ -47,7 +47,7 @@ type ec2InstanceSetConfig struct {
SubnetID string
AdminUsername string
EBSVolumeType string
- IamInstanceProfile string
+ IAMInstanceProfile string
}
type ec2Interface interface {
@@ -231,9 +231,9 @@ func (instanceSet *ec2InstanceSet) Create(
}}
}
- if instanceSet.ec2config.IamInstanceProfile != "" {
+ if instanceSet.ec2config.IAMInstanceProfile != "" {
rii.IamInstanceProfile = &ec2.IamInstanceProfileSpecification{
- Name: aws.String(instanceSet.ec2config.IamInstanceProfile),
+ Name: aws.String(instanceSet.ec2config.IAMInstanceProfile),
}
}
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index d6b90a6d4..d787bdb52 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1211,9 +1211,9 @@ Clusters:
Region: ""
EBSVolumeType: gp2
AdminUsername: debian
- # (ec2) name of the IamInstanceProfile for instances started by
+ # (ec2) name of the IAMInstanceProfile for instances started by
# the cloud dispatcher. Leave blank when not needed.
- IamInstanceProfile: ""
+ IAMInstanceProfile: ""
# (azure) Credentials.
SubscriptionID: ""
diff --git a/tools/compute-images/scripts/create-ebs-volume-nvme.patch b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
index 1448ae1f2..b6ef81148 100644
--- a/tools/compute-images/scripts/create-ebs-volume-nvme.patch
+++ b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
@@ -4,9 +4,11 @@
Make the create-ebs-volume script work with nvme devices.
---- a/create-ebs-volume 2022-02-18 15:24:19.866607848 -0500
-+++ b/create-ebs-volume 2022-02-18 16:23:17.931870970 -0500
-@@ -149,9 +152,20 @@
+diff --git a/bin/create-ebs-volume b/bin/create-ebs-volume
+index 6857564..efeac35 100755
+--- a/create-ebs-volume
++++ b/create-ebs-volume
+@@ -149,10 +149,11 @@ function get_next_logical_device() {
for letter in ${alphabet[@]}; do
# use /dev/xvdb* device names to avoid contention for /dev/sd* and /dev/xvda names
# only supported by HVM instances
@@ -16,48 +18,40 @@ Make the create-ebs-volume script work with nvme devices.
+ fi
echo "/dev/xvdb${letter}"
break
-+ done
-+}
-+
-+numbers=( {1..255} )
-+function get_next_logical_nvme_device() {
-+ for num in ${numbers[@]}; do
-+ if [ ! -b "/dev/nvme${num}n1" ]; then
-+ echo "/dev/nvme${num}"
-+ break
- fi
+- fi
done
}
-@@ -243,10 +257,12 @@
-
- # check if there are available device names
- local device=$(get_next_logical_device)
-+ local nvme_device=$(get_next_logical_nvme_device)
- if [ -z "$device" ]; then
- error "no device names available for volume"
- fi
- logthis "next available device: $device"
-+ logthis "next available nvme device: $nvme_device"
- # create the volume
- local tmpfile=$(mktemp /tmp/ebs-autoscale.create-volume.XXXXXXXXXX)
-@@ -323,8 +339,8 @@
+@@ -322,13 +323,21 @@ function create_and_attach_volume() {
+ set -e
logthis "waiting for volume $volume_id on filesystem"
++ set +e
while true; do
- if [ -e "$device" ]; then
- logthis "volume $volume_id on filesystem as $device"
-+ if [ -e "$nvme_device" ]; then
++ # AWS returns e.g. vol-00338247831716a7b4, the kernel changes that to vol00338247831716a7b
++ valid_volume_id=`echo $volume_id |sed -e 's/[^a-zA-Z0-9]//'`
++ # example lsblk output:
++ # nvme4n1 259:7 0 150G 0 disk vol00338247831716a7b
++ LSBLK=`lsblk -o +SERIAL |grep $valid_volume_id`
++ if [[ $? -eq 0 ]]; then
++ nvme_device=`echo $LSBLK|cut -f1 -d' '|xargs -I {} echo "/dev/{}"`
+ logthis "volume $volume_id on filesystem as $nvme_device (aws device $device)"
break
fi
sleep 1
-@@ -338,7 +354,7 @@
+ done
++ set -e
+
+ # set volume delete on termination
+ aws ec2 modify-instance-attribute \
+@@ -338,7 +347,7 @@ function create_and_attach_volume() {
> /dev/null
logthis "volume $volume_id DeleteOnTermination ENABLED"
- echo $device
-+ echo "$nvme_device"n1
++ echo "$nvme_device"
}
create_and_attach_volume
commit 4ca18c163d557d14c27abecf86bb637d8009d36d
Author: Ward Vandewege <ward at curii.com>
Date: Mon Feb 21 09:32:08 2022 -0500
18772: document the AWS EBS autoscaler support. Refactor the "Build a
cloud compute node image" documentation page to improve the flow.
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
index 131dde599..033fb93d6 100644
--- a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
+++ b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
@@ -17,8 +17,10 @@ SPDX-License-Identifier: CC-BY-SA-3.0
# "Create an SSH keypair":#sshkeypair
# "Compute image requirements":#requirements
# "The build script":#building
+# "DNS resolution":#dns-resolution
# "Singularity mksquashfs configuration":#singularity_mksquashfs_configuration
# "Build an AWS image":#aws
+## "Autoscaling compute node scratch space":#aws-ebs-autoscaler
# "Build an Azure image":#azure
h2(#introduction). Introduction
@@ -56,12 +58,6 @@ foktmqOY8MyctzFgXBpGTxPliGjqo8OkrOyQP2g+FL7v+Km31Xs61P8=
</code></pre>
</notextile>
-{% assign show_docker_warning = true %}
-
-{% include 'singularity_mksquashfs_configuration' %}
-
-The desired amount of memory to make available for @mksquashfs@ can be configured in an argument to "the build script":#building. It defaults to @256M at .
-
h2(#requirements). Compute image requirements
Arvados comes with a build script to automate the creation of a suitable compute node image (see "The build script":#building below). It is provided as a convenience. It is also possible to create a compute node image via other means. These are the requirements:
@@ -101,6 +97,8 @@ Options:
VPC id for AWS, otherwise packer will pick the default one
--aws-subnet-id
Subnet id for AWS otherwise packer will pick the default one for the VPC
+ --aws-ebs-autoscale (default: false)
+ Install the AWS EBS autoscaler daemon.
--gcp-project-id (default: false, required if building for GCP)
GCP project id
--gcp-account-file (default: false, required if building for GCP)
@@ -129,6 +127,25 @@ Options:
Output debug information (default: false)
</code></pre></notextile>
+h2(#dns-resolution). DNS resolution
+
+Compute nodes must be able to resolve the hostnames of the API server and any keepstore servers to your internal IP addresses. You can do this by running an internal DNS resolver. The IP address of the resolver should be passed as the value for the @--resolver@ argument to "the build script":#building.
+
+Alternatively, the services could be hardcoded into an @/etc/hosts@ file. For example:
+
+<notextile><pre><code>10.20.30.40 <span class="userinput">ClusterID.example.com</span>
+10.20.30.41 <span class="userinput">keep1.ClusterID.example.com</span>
+10.20.30.42 <span class="userinput">keep2.ClusterID.example.com</span>
+</code></pre></notextile>
+
+Adding these lines to the @/etc/hosts@ file in the compute node image could be done with a small change to the Packer template and the @scripts/base.sh@ script, which will be left as an exercise for the reader.
+
+{% assign show_docker_warning = true %}
+
+{% include 'singularity_mksquashfs_configuration' %}
+
+The desired amount of memory to make available for @mksquashfs@ can be configured in an argument to "the build script":#building. It defaults to @256M at .
+
h2(#aws). Build an AWS image
<notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-aws.json \
@@ -149,17 +166,26 @@ For @ClusterID@, fill in your cluster ID. The @VPC@ and @Subnet@ should be confi
@ArvadosDispatchCloudPublicKeyPath@ should be replaced with the path to the ssh *public* key file generated in "Create an SSH keypair":#sshkeypair, above.
-Compute nodes must be able to resolve the hostnames of the API server and any keepstore servers to your internal IP addresses. You can do this by running an internal DNS resolver. The IP address of the resolver should replace the string @ResolverIP@ in the command above.
-
-Alternatively, the services could be hardcoded into an @/etc/hosts@ file. For example:
-
-<notextile><pre><code>10.20.30.40 <span class="userinput">ClusterID.example.com</span>
-10.20.30.41 <span class="userinput">keep1.ClusterID.example.com</span>
-10.20.30.42 <span class="userinput">keep2.ClusterID.example.com</span>
+h3(#aws-ebs-autoscaler). Autoscaling compute node scratch space
+
+If you want to add the AWS EBS autoscaler daemon in your images, add the @--aws-ebs-autoscale@ flag to the "the build script":#building. Doing so will make the compute image scratch space scale automatically as needed. The @Containers/InstanceTypes@ list should be modified so that all @AddedScratch@ lines are removed, and the @IncludedScratch@ value should be set to a (fictional) high number. This way, the scratch space requirements will be met by all the defined instance type. For example:
+
+<notextile><pre><code> InstanceTypes:
+ c5large:
+ ProviderType: c5.large
+ VCPUs: 2
+ RAM: 4GiB
+ IncludedScratch: 16TB
+ Price: 0.085
+ m5large:
+ ProviderType: m5.large
+ VCPUs: 2
+ RAM: 8GiB
+ IncludedScratch: 16TB
+ Price: 0.096
+...
</code></pre></notextile>
-Adding these lines to the @/etc/hosts@ file in the compute node image could be done with a small change to the Packer template and the @scripts/base.sh@ script, which will be left as an exercise for the reader.
-
h2(#azure). Build an Azure image
<notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-azure.json \
@@ -189,14 +215,3 @@ These secrets can be generated from the Azure portal, or with the cli using a co
</code></pre></notextile>
@ArvadosDispatchCloudPublicKeyPath@ should be replaced with the path to the ssh *public* key file generated in "Create an SSH keypair":#sshkeypair, above.
-
-Compute nodes must be able to resolve the hostnames of the API server and any keepstore servers to your internal IP addresses. You can do this by running an internal DNS resolver. The IP address of the resolver should replace the string @ResolverIP@ in the command above.
-
-Alternatively, the services could be hardcoded into an @/etc/hosts@ file. For example:
-
-<notextile><pre><code>10.20.30.40 <span class="userinput">ClusterID.example.com</span>
-10.20.30.41 <span class="userinput">keep1.ClusterID.example.com</span>
-10.20.30.42 <span class="userinput">keep2.ClusterID.example.com</span>
-</code></pre></notextile>
-
-Adding these lines to the @/etc/hosts@ file in the compute node image could be done with a small change to the Packer template and the @scripts/base.sh@ script, which will be left as an exercise for the reader.
commit 13067dd6ee45bdb57c87da7311e70b42d8094323
Author: Ward Vandewege <ward at curii.com>
Date: Mon Feb 21 09:08:30 2022 -0500
18772: consistently use bash comparison operators in build.sh
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/tools/compute-images/build.sh b/tools/compute-images/build.sh
index 135a4aa3c..33713f384 100755
--- a/tools/compute-images/build.sh
+++ b/tools/compute-images/build.sh
@@ -187,7 +187,7 @@ while [ $# -gt 0 ]; do
done
-if [[ "$JSON_FILE" == "" ]] || [[ ! -f "$JSON_FILE" ]]; then
+if [[ -z "$JSON_FILE" ]] || [[ ! -f "$JSON_FILE" ]]; then
echo >&2 "$helpmessage"
echo >&2
echo >&2 "ERROR: packer json file not found"
@@ -203,7 +203,7 @@ if [[ -z "$ARVADOS_CLUSTER_ID" ]]; then
exit 1
fi
-if [[ "$PUBLIC_KEY_FILE" == "" ]] || [[ ! -f "$PUBLIC_KEY_FILE" ]]; then
+if [[ -z "$PUBLIC_KEY_FILE" ]] || [[ ! -f "$PUBLIC_KEY_FILE" ]]; then
echo >&2 "$helpmessage"
echo >&2
echo >&2 "ERROR: public key file file not found"
@@ -222,62 +222,61 @@ fi
EXTRA2=""
-if [[ "$AWS_SOURCE_AMI" != "" ]]; then
+if [[ -n "$AWS_SOURCE_AMI" ]]; then
EXTRA2+=" -var aws_source_ami=$AWS_SOURCE_AMI"
fi
-if [[ "$AWS_PROFILE" != "" ]]; then
+if [[ -n "$AWS_PROFILE" ]]; then
EXTRA2+=" -var aws_profile=$AWS_PROFILE"
fi
-if [[ "$AWS_VPC_ID" != "" ]]; then
+if [[ -n "$AWS_VPC_ID" ]]; then
EXTRA2+=" -var vpc_id=$AWS_VPC_ID -var associate_public_ip_address=true "
fi
-if [[ "$AWS_SUBNET_ID" != "" ]]; then
+if [[ -n "$AWS_SUBNET_ID" ]]; then
EXTRA2+=" -var subnet_id=$AWS_SUBNET_ID -var associate_public_ip_address=true "
fi
-if [[ "$AWS_DEFAULT_REGION" != "" ]]; then
+if [[ -n "$AWS_DEFAULT_REGION" ]]; then
EXTRA2+=" -var aws_default_region=$AWS_DEFAULT_REGION"
fi
-if [[ "$AWS_EBS_AUTOSCALE" != "" ]]; then
+if [[ -n "$AWS_EBS_AUTOSCALE" ]]; then
EXTRA2+=" -var aws_ebs_autoscale=$AWS_EBS_AUTOSCALE"
fi
-if [[ "$GCP_PROJECT_ID" != "" ]]; then
+if [[ -n "$GCP_PROJECT_ID" ]]; then
EXTRA2+=" -var project_id=$GCP_PROJECT_ID"
fi
-if [[ "$GCP_ACCOUNT_FILE" != "" ]]; then
+if [[ -n "$GCP_ACCOUNT_FILE" ]]; then
EXTRA2+=" -var account_file=$GCP_ACCOUNT_FILE"
fi
-if [[ "$GCP_ZONE" != "" ]]; then
+if [[ -n "$GCP_ZONE" ]]; then
EXTRA2+=" -var zone=$GCP_ZONE"
fi
-if [[ "$AZURE_RESOURCE_GROUP" != "" ]]; then
+if [[ -n "$AZURE_RESOURCE_GROUP" ]]; then
EXTRA2+=" -var resource_group=$AZURE_RESOURCE_GROUP"
fi
-if [[ "$AZURE_LOCATION" != "" ]]; then
+if [[ -n "$AZURE_LOCATION" ]]; then
EXTRA2+=" -var location=$AZURE_LOCATION"
fi
-if [[ "$AZURE_SKU" != "" ]]; then
+if [[ -n "$AZURE_SKU" ]]; then
EXTRA2+=" -var image_sku=$AZURE_SKU"
fi
-if [[ "$AZURE_CLOUD_ENVIRONMENT" != "" ]]; then
+if [[ -n "$AZURE_CLOUD_ENVIRONMENT" ]]; then
EXTRA2+=" -var cloud_environment_name=$AZURE_CLOUD_ENVIRONMENT"
fi
-if [[ "$SSH_USER" != "" ]]; then
+if [[ -n "$SSH_USER" ]]; then
EXTRA2+=" -var ssh_user=$SSH_USER"
fi
-if [[ "$RESOLVER" != "" ]]; then
+if [[ -n "$RESOLVER" ]]; then
EXTRA2+=" -var resolver=$RESOLVER"
fi
-if [[ "$REPOSUFFIX" != "" ]]; then
+if [[ -n "$REPOSUFFIX" ]]; then
EXTRA2+=" -var reposuffix=$REPOSUFFIX"
fi
-if [[ "$PUBLIC_KEY_FILE" != "" ]]; then
+if [[ -n "$PUBLIC_KEY_FILE" ]]; then
EXTRA2+=" -var public_key_file=$PUBLIC_KEY_FILE"
fi
-if [[ "$MKSQUASHFS_MEM" != "" ]]; then
+if [[ -n "$MKSQUASHFS_MEM" ]]; then
EXTRA2+=" -var mksquashfs_mem=$MKSQUASHFS_MEM"
fi
-
echo
packer version
echo
commit c7a97e3ae140c48331a27817acc9929a07aa515a
Author: Ward Vandewege <ward at curii.com>
Date: Fri Feb 18 19:27:11 2022 -0500
18772: add support for the AWS EBS autoscale script to the compute node
image builder.
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
index b4987f443..c0872b08f 100644
--- a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
+++ b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
@@ -99,6 +99,32 @@ The <span class="userinput">ImageID</span> value is the compute node image that
</code></pre>
</notextile>
+Example policy for the IAM role used by the cloud dispatcher:
+
+<notextile>
+<pre>
+{
+ "Version": "2012-10-17",
+ "Id": "arvados-dispatch-cloud policy",
+ "Statement": [
+ {
+ "Effect": "Allow",
+ "Action": [
+ "iam:PassRole",
+ "ec2:DescribeKeyPairs",
+ "ec2:ImportKeyPair",
+ "ec2:RunInstances",
+ "ec2:DescribeInstances",
+ "ec2:CreateTags",
+ "ec2:TerminateInstances"
+ ],
+ "Resource": "*"
+ }
+ ]
+}
+</pre>
+</notextile>
+
h4. Minimal configuration example for Azure
Using managed disks:
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index aefbcdf70..d6b90a6d4 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1211,6 +1211,8 @@ Clusters:
Region: ""
EBSVolumeType: gp2
AdminUsername: debian
+ # (ec2) name of the IamInstanceProfile for instances started by
+ # the cloud dispatcher. Leave blank when not needed.
IamInstanceProfile: ""
# (azure) Credentials.
diff --git a/tools/compute-images/arvados-images-aws.json b/tools/compute-images/arvados-images-aws.json
index b1b4c909d..ece8149c3 100644
--- a/tools/compute-images/arvados-images-aws.json
+++ b/tools/compute-images/arvados-images-aws.json
@@ -5,7 +5,8 @@
"aws_access_key": "",
"aws_profile": "",
"aws_secret_key": "",
- "aws_source_ami": "ami-04d70e069399af2e9",
+ "aws_source_ami": "ami-031283ff8a43b021c",
+ "aws_ebs_autoscale": "",
"build_environment": "aws",
"public_key_file": "",
"mksquashfs_mem": "",
@@ -69,6 +70,14 @@
"type": "file",
"source": "scripts/usr-local-bin-ensure-encrypted-partitions.sh",
"destination": "/tmp/usr-local-bin-ensure-encrypted-partitions.sh"
+ },{
+ "type": "file",
+ "source": "scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh",
+ "destination": "/tmp/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh"
+ },{
+ "type": "file",
+ "source": "scripts/create-ebs-volume-nvme.patch",
+ "destination": "/tmp/create-ebs-volume-nvme.patch"
},{
"type": "file",
"source": "{{user `public_key_file`}}",
@@ -77,6 +86,6 @@
"type": "shell",
"execute_command": "sudo -S env {{ .Vars }} /bin/bash '{{ .Path }}'",
"script": "scripts/base.sh",
- "environment_vars": ["RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}","MKSQUASHFS_MEM={{user `mksquashfs_mem`}}"]
+ "environment_vars": ["RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}","MKSQUASHFS_MEM={{user `mksquashfs_mem`}}","CLOUD=aws","AWS_EBS_AUTOSCALE={{user `aws_ebs_autoscale`}}"]
}]
}
diff --git a/tools/compute-images/build.sh b/tools/compute-images/build.sh
index 526db4906..135a4aa3c 100755
--- a/tools/compute-images/build.sh
+++ b/tools/compute-images/build.sh
@@ -33,6 +33,8 @@ Options:
VPC id for AWS, otherwise packer will pick the default one
--aws-subnet-id
Subnet id for AWS otherwise packer will pick the default one for the VPC
+ --aws-ebs-autoscale (default: false)
+ Install the AWS EBS autoscaler daemon.
--gcp-project-id (default: false, required if building for GCP)
GCP project id
--gcp-account-file (default: false, required if building for GCP)
@@ -60,6 +62,8 @@ Options:
--debug
Output debug information (default: false)
+For more information, see the Arvados documentation at https://doc.arvados.org/install/crunch2-cloud/install-compute-node.html
+
EOF
JSON_FILE=
@@ -69,6 +73,7 @@ AWS_SECRETS_FILE=
AWS_SOURCE_AMI=
AWS_VPC_ID=
AWS_SUBNET_ID=
+AWS_EBS_AUTOSCALE=
GCP_PROJECT_ID=
GCP_ACCOUNT_FILE=
GCP_ZONE=
@@ -83,7 +88,7 @@ PUBLIC_KEY_FILE=
MKSQUASHFS_MEM=256M
PARSEDOPTS=$(getopt --name "$0" --longoptions \
- help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,resolver:,reposuffix:,public-key-file:,mksquashfs-mem:,debug \
+ help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,aws-ebs-autoscale,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,resolver:,reposuffix:,public-key-file:,mksquashfs-mem:,debug \
-- "" "$@")
if [ $? -ne 0 ]; then
exit 1
@@ -121,6 +126,9 @@ while [ $# -gt 0 ]; do
--aws-subnet-id)
AWS_SUBNET_ID="$2"; shift
;;
+ --aws-ebs-autoscale)
+ AWS_EBS_AUTOSCALE=1
+ ;;
--gcp-project-id)
GCP_PROJECT_ID="$2"; shift
;;
@@ -229,6 +237,9 @@ fi
if [[ "$AWS_DEFAULT_REGION" != "" ]]; then
EXTRA2+=" -var aws_default_region=$AWS_DEFAULT_REGION"
fi
+if [[ "$AWS_EBS_AUTOSCALE" != "" ]]; then
+ EXTRA2+=" -var aws_ebs_autoscale=$AWS_EBS_AUTOSCALE"
+fi
if [[ "$GCP_PROJECT_ID" != "" ]]; then
EXTRA2+=" -var project_id=$GCP_PROJECT_ID"
fi
diff --git a/tools/compute-images/scripts/base.sh b/tools/compute-images/scripts/base.sh
index d6887add5..7d8ad82c2 100644
--- a/tools/compute-images/scripts/base.sh
+++ b/tools/compute-images/scripts/base.sh
@@ -142,8 +142,30 @@ $SUDO chmod 700 /home/crunch/.ssh/
if [ "x$RESOLVER" != "x" ]; then
$SUDO sed -i "s/#prepend domain-name-servers 127.0.0.1;/prepend domain-name-servers ${RESOLVER};/" /etc/dhcp/dhclient.conf
fi
-# Set up the cloud-init script that will ensure encrypted disks
-$SUDO mv /tmp/usr-local-bin-ensure-encrypted-partitions.sh /usr/local/bin/ensure-encrypted-partitions.sh
+
+if [ "$AWS_EBS_AUTOSCALE" != "1" ]; then
+ # Set up the cloud-init script that will ensure encrypted disks
+ $SUDO mv /tmp/usr-local-bin-ensure-encrypted-partitions.sh /usr/local/bin/ensure-encrypted-partitions.sh
+else
+ wait_for_apt_locks && $SUDO DEBIAN_FRONTEND=noninteractive apt-get -qq --yes install jq unzip
+
+ curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/awscliv2.zip"
+ unzip -q /tmp/awscliv2.zip -d /tmp && $SUDO /tmp/aws/install
+ # Pinned to v2.4.5 because we apply a patch below
+ #export EBS_AUTOSCALE_VERSION=$(curl --silent "https://api.github.com/repos/awslabs/amazon-ebs-autoscale/releases/latest" | jq -r .tag_name)
+ export EBS_AUTOSCALE_VERSION="v2.4.5"
+ cd /opt && $SUDO git clone https://github.com/awslabs/amazon-ebs-autoscale.git
+ cd /opt/amazon-ebs-autoscale && $SUDO git checkout $EBS_AUTOSCALE_VERSION
+ cd bin
+ $SUDO patch -p1 < /tmp/create-ebs-volume-nvme.patch
+
+ # This script really requires bash and the shebang line is wrong
+ $SUDO sed -i 's|^#!/bin/sh|#!/bin/bash|' /opt/amazon-ebs-autoscale/bin/ebs-autoscale
+
+ # Set up the cloud-init script that makes use of the AWS EBS autoscaler
+ $SUDO mv /tmp/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh /usr/local/bin/ensure-encrypted-partitions.sh
+fi
+
$SUDO chmod 755 /usr/local/bin/ensure-encrypted-partitions.sh
$SUDO chown root:root /usr/local/bin/ensure-encrypted-partitions.sh
$SUDO mv /tmp/etc-cloud-cloud.cfg.d-07_compute_arvados_dispatch_cloud.cfg /etc/cloud/cloud.cfg.d/07_compute_arvados_dispatch_cloud.cfg
diff --git a/tools/compute-images/scripts/create-ebs-volume-nvme.patch b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
new file mode 100644
index 000000000..1448ae1f2
--- /dev/null
+++ b/tools/compute-images/scripts/create-ebs-volume-nvme.patch
@@ -0,0 +1,63 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+Make the create-ebs-volume script work with nvme devices.
+
+--- a/create-ebs-volume 2022-02-18 15:24:19.866607848 -0500
++++ b/create-ebs-volume 2022-02-18 16:23:17.931870970 -0500
+@@ -149,9 +152,20 @@
+ for letter in ${alphabet[@]}; do
+ # use /dev/xvdb* device names to avoid contention for /dev/sd* and /dev/xvda names
+ # only supported by HVM instances
+- if [ ! -b "/dev/xvdb${letter}" ]; then
++ if [[ $created_volumes =~ .*/dev/xvdb${letter}.* ]]; then
++ continue
++ fi
+ echo "/dev/xvdb${letter}"
+ break
++ done
++}
++
++numbers=( {1..255} )
++function get_next_logical_nvme_device() {
++ for num in ${numbers[@]}; do
++ if [ ! -b "/dev/nvme${num}n1" ]; then
++ echo "/dev/nvme${num}"
++ break
+ fi
+ done
+ }
+@@ -243,10 +257,12 @@
+
+ # check if there are available device names
+ local device=$(get_next_logical_device)
++ local nvme_device=$(get_next_logical_nvme_device)
+ if [ -z "$device" ]; then
+ error "no device names available for volume"
+ fi
+ logthis "next available device: $device"
++ logthis "next available nvme device: $nvme_device"
+
+ # create the volume
+ local tmpfile=$(mktemp /tmp/ebs-autoscale.create-volume.XXXXXXXXXX)
+@@ -323,8 +339,8 @@
+
+ logthis "waiting for volume $volume_id on filesystem"
+ while true; do
+- if [ -e "$device" ]; then
+- logthis "volume $volume_id on filesystem as $device"
++ if [ -e "$nvme_device" ]; then
++ logthis "volume $volume_id on filesystem as $nvme_device (aws device $device)"
+ break
+ fi
+ sleep 1
+@@ -338,7 +354,7 @@
+ > /dev/null
+ logthis "volume $volume_id DeleteOnTermination ENABLED"
+
+- echo $device
++ echo "$nvme_device"n1
+ }
+
+ create_and_attach_volume
diff --git a/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh
new file mode 100644
index 000000000..4b73c8bc4
--- /dev/null
+++ b/tools/compute-images/scripts/usr-local-bin-ensure-encrypted-partitions-aws-ebs-autoscale.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+set -x
+
+MOUNTPATH=/tmp
+
+findmntq() {
+ findmnt "$@" >/dev/null
+}
+
+ensure_umount() {
+ if findmntq "$1"; then
+ umount "$1"
+ fi
+}
+
+# First make sure docker is not using /tmp, then unmount everything under it.
+if [ -d /etc/sv/docker.io ]
+then
+ sv stop docker.io || service stop docker.io || true
+else
+ service docker stop || true
+fi
+
+ensure_umount "$MOUNTPATH/docker/aufs"
+
+/bin/bash /opt/amazon-ebs-autoscale/install.sh -f lvm.ext4 -m $MOUNTPATH 2>&1 > /var/log/ebs-autoscale-install.log
+
+# Make sure docker uses the big partition
+cat <<EOF > /etc/docker/daemon.json
+{
+ "data-root": "$MOUNTPATH/docker-data"
+}
+EOF
+
+# restart docker
+if [ -d /etc/sv/docker.io ]
+then
+ ## runit
+ sv up docker.io
+else
+ service docker start
+fi
+
+end=$((SECONDS+60))
+
+while [ $SECONDS -lt $end ]; do
+ if /usr/bin/docker ps -q >/dev/null; then
+ exit 0
+ fi
+ sleep 1
+done
+
+# Docker didn't start within a minute, abort
+exit 1
commit 28ffc3ab52c5ae7adacf5c7906082d7aa0b6dc54
Author: Ward Vandewege <ward at curii.com>
Date: Thu Feb 17 16:31:23 2022 -0500
18772: arvados-dispatch-cloud: add IamInstanceProfile field to the ec2
driver.
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/lib/cloud/ec2/ec2.go b/lib/cloud/ec2/ec2.go
index 269a7d8de..2cbe4cf29 100644
--- a/lib/cloud/ec2/ec2.go
+++ b/lib/cloud/ec2/ec2.go
@@ -40,13 +40,14 @@ const (
)
type ec2InstanceSetConfig struct {
- AccessKeyID string
- SecretAccessKey string
- Region string
- SecurityGroupIDs arvados.StringSet
- SubnetID string
- AdminUsername string
- EBSVolumeType string
+ AccessKeyID string
+ SecretAccessKey string
+ Region string
+ SecurityGroupIDs arvados.StringSet
+ SubnetID string
+ AdminUsername string
+ EBSVolumeType string
+ IamInstanceProfile string
}
type ec2Interface interface {
@@ -230,6 +231,12 @@ func (instanceSet *ec2InstanceSet) Create(
}}
}
+ if instanceSet.ec2config.IamInstanceProfile != "" {
+ rii.IamInstanceProfile = &ec2.IamInstanceProfileSpecification{
+ Name: aws.String(instanceSet.ec2config.IamInstanceProfile),
+ }
+ }
+
rsv, err := instanceSet.client.RunInstances(&rii)
err = wrapError(err, &instanceSet.throttleDelayCreate)
if err != nil {
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 3a02308e5..aefbcdf70 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1211,6 +1211,7 @@ Clusters:
Region: ""
EBSVolumeType: gp2
AdminUsername: debian
+ IamInstanceProfile: ""
# (azure) Credentials.
SubscriptionID: ""
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list