[ARVADOS] created: 2.1.0-1741-g2e1049531
Git user
git at public.arvados.org
Thu Dec 16 19:13:35 UTC 2021
at 2e1049531cb7389cc5633b47d8a41e602da295f3 (commit)
commit 2e1049531cb7389cc5633b47d8a41e602da295f3
Author: Ward Vandewege <ward at curii.com>
Date: Thu Dec 16 14:12:49 2021 -0500
18325: add support for Nvidia GPUs as an optional feature to our compute
node image builder script, for AWS.
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/tools/compute-images/arvados-images-aws.json b/tools/compute-images/arvados-images-aws.json
index b1b4c909d..0865343dc 100644
--- a/tools/compute-images/arvados-images-aws.json
+++ b/tools/compute-images/arvados-images-aws.json
@@ -5,10 +5,11 @@
"aws_access_key": "",
"aws_profile": "",
"aws_secret_key": "",
- "aws_source_ami": "ami-04d70e069399af2e9",
+ "aws_source_ami": "ami-031283ff8a43b021c",
"build_environment": "aws",
"public_key_file": "",
"mksquashfs_mem": "",
+ "nvidia_gpu_support": "",
"reposuffix": "",
"resolver": "",
"ssh_user": "admin",
@@ -77,6 +78,6 @@
"type": "shell",
"execute_command": "sudo -S env {{ .Vars }} /bin/bash '{{ .Path }}'",
"script": "scripts/base.sh",
- "environment_vars": ["RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}","MKSQUASHFS_MEM={{user `mksquashfs_mem`}}"]
+ "environment_vars": ["RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}","MKSQUASHFS_MEM={{user `mksquashfs_mem`}}","NVIDIA_GPU_SUPPORT={{user `nvidia_gpu_support`}}"]
}]
}
diff --git a/tools/compute-images/build.sh b/tools/compute-images/build.sh
index 526db4906..fce8b1918 100755
--- a/tools/compute-images/build.sh
+++ b/tools/compute-images/build.sh
@@ -57,8 +57,10 @@ Options:
Path to the public key file that a-d-c will use to log into the compute node
--mksquashfs-mem (default: 256M)
Only relevant when using Singularity. This is the amount of memory mksquashfs is allowed to use.
- --debug
- Output debug information (default: false)
+ --nvidia-gpu-support (default: false)
+ Install all the necessary tooling for Nvidia GPU support
+ --debug (default: false)
+ Output debug information
EOF
@@ -81,9 +83,10 @@ SSH_USER=
AWS_DEFAULT_REGION=us-east-1
PUBLIC_KEY_FILE=
MKSQUASHFS_MEM=256M
+NVIDIA_GPU_SUPPORT=
PARSEDOPTS=$(getopt --name "$0" --longoptions \
- help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,resolver:,reposuffix:,public-key-file:,mksquashfs-mem:,debug \
+ help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,resolver:,reposuffix:,public-key-file:,mksquashfs-mem:,nvidia-gpu-support,debug \
-- "" "$@")
if [ $? -ne 0 ]; then
exit 1
@@ -160,6 +163,9 @@ while [ $# -gt 0 ]; do
--mksquashfs-mem)
MKSQUASHFS_MEM="$2"; shift
;;
+ --nvidia-gpu-support)
+ NVIDIA_GPU_SUPPORT=1
+ ;;
--debug)
# If you want to debug a build issue, add the -debug flag to the build
# command in question.
@@ -265,6 +271,10 @@ fi
if [[ "$MKSQUASHFS_MEM" != "" ]]; then
EXTRA2+=" -var mksquashfs_mem=$MKSQUASHFS_MEM"
fi
+if [[ "$NVIDIA_GPU_SUPPORT" != "" ]]; then
+ EXTRA2+=" -var nvidia_gpu_support=$NVIDIA_GPU_SUPPORT"
+fi
+
echo
diff --git a/tools/compute-images/scripts/base.sh b/tools/compute-images/scripts/base.sh
index 0ab51223b..f180f81c4 100644
--- a/tools/compute-images/scripts/base.sh
+++ b/tools/compute-images/scripts/base.sh
@@ -149,3 +149,25 @@ $SUDO chmod 755 /usr/local/bin/ensure-encrypted-partitions.sh
$SUDO chown root:root /usr/local/bin/ensure-encrypted-partitions.sh
$SUDO mv /tmp/etc-cloud-cloud.cfg.d-07_compute_arvados_dispatch_cloud.cfg /etc/cloud/cloud.cfg.d/07_compute_arvados_dispatch_cloud.cfg
$SUDO chown root:root /etc/cloud/cloud.cfg.d/07_compute_arvados_dispatch_cloud.cfg
+
+if [ "$NVIDIA_GPU_SUPPORT" == "1" ]; then
+ DIST=$(. /etc/os-release; echo $ID$VERSION_ID)
+ # We need a kernel and matching headers
+ $sudo apt-get -y install linux-image-cloud-amd64 linux-headers-cloud-amd64
+
+ # Install CUDA
+ $sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/$DIST/x86_64/7fa2af80.pub
+ $sudo apt-get -y install software-properties-common
+ $sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/$DIST/x86_64/ /"
+ $sudo add-apt-repository contrib
+ $sudo apt-get update
+ $sudo apt-get -y install cuda
+
+ # Install libnvidia-container, the tooling for Docker/Singularity
+ curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | \
+ $sudo apt-key add -
+ curl -s -L https://nvidia.github.io/libnvidia-container/$DIST/libnvidia-container.list | \
+ $sudo tee /etc/apt/sources.list.d/libnvidia-container.list
+ $sudo apt-get update
+ $sudo apt-get -y install libnvidia-container1 libnvidia-container-tools nvidia-container-toolkit
+fi
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list