[ARVADOS] created: 2.1.0-1741-g2e1049531

Git user git at public.arvados.org
Thu Dec 16 19:13:35 UTC 2021


        at  2e1049531cb7389cc5633b47d8a41e602da295f3 (commit)


commit 2e1049531cb7389cc5633b47d8a41e602da295f3
Author: Ward Vandewege <ward at curii.com>
Date:   Thu Dec 16 14:12:49 2021 -0500

    18325: add support for Nvidia GPUs as an optional feature to our compute
           node image builder script, for AWS.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/compute-images/arvados-images-aws.json b/tools/compute-images/arvados-images-aws.json
index b1b4c909d..0865343dc 100644
--- a/tools/compute-images/arvados-images-aws.json
+++ b/tools/compute-images/arvados-images-aws.json
@@ -5,10 +5,11 @@
     "aws_access_key": "",
     "aws_profile": "",
     "aws_secret_key": "",
-    "aws_source_ami": "ami-04d70e069399af2e9",
+    "aws_source_ami": "ami-031283ff8a43b021c",
     "build_environment": "aws",
     "public_key_file": "",
     "mksquashfs_mem": "",
+    "nvidia_gpu_support": "",
     "reposuffix": "",
     "resolver": "",
     "ssh_user": "admin",
@@ -77,6 +78,6 @@
     "type": "shell",
     "execute_command": "sudo -S env {{ .Vars }} /bin/bash '{{ .Path }}'",
     "script": "scripts/base.sh",
-    "environment_vars": ["RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}","MKSQUASHFS_MEM={{user `mksquashfs_mem`}}"]
+    "environment_vars": ["RESOLVER={{user `resolver`}}","REPOSUFFIX={{user `reposuffix`}}","MKSQUASHFS_MEM={{user `mksquashfs_mem`}}","NVIDIA_GPU_SUPPORT={{user `nvidia_gpu_support`}}"]
   }]
 }
diff --git a/tools/compute-images/build.sh b/tools/compute-images/build.sh
index 526db4906..fce8b1918 100755
--- a/tools/compute-images/build.sh
+++ b/tools/compute-images/build.sh
@@ -57,8 +57,10 @@ Options:
       Path to the public key file that a-d-c will use to log into the compute node
   --mksquashfs-mem (default: 256M)
       Only relevant when using Singularity. This is the amount of memory mksquashfs is allowed to use.
-  --debug
-      Output debug information (default: false)
+  --nvidia-gpu-support (default: false)
+      Install all the necessary tooling for Nvidia GPU support
+  --debug (default: false)
+      Output debug information
 
 EOF
 
@@ -81,9 +83,10 @@ SSH_USER=
 AWS_DEFAULT_REGION=us-east-1
 PUBLIC_KEY_FILE=
 MKSQUASHFS_MEM=256M
+NVIDIA_GPU_SUPPORT=
 
 PARSEDOPTS=$(getopt --name "$0" --longoptions \
-    help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,resolver:,reposuffix:,public-key-file:,mksquashfs-mem:,debug \
+    help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,resolver:,reposuffix:,public-key-file:,mksquashfs-mem:,nvidia-gpu-support,debug \
     -- "" "$@")
 if [ $? -ne 0 ]; then
     exit 1
@@ -160,6 +163,9 @@ while [ $# -gt 0 ]; do
         --mksquashfs-mem)
             MKSQUASHFS_MEM="$2"; shift
             ;;
+        --nvidia-gpu-support)
+            NVIDIA_GPU_SUPPORT=1
+            ;;
         --debug)
             # If you want to debug a build issue, add the -debug flag to the build
             # command in question.
@@ -265,6 +271,10 @@ fi
 if [[ "$MKSQUASHFS_MEM" != "" ]]; then
   EXTRA2+=" -var mksquashfs_mem=$MKSQUASHFS_MEM"
 fi
+if [[ "$NVIDIA_GPU_SUPPORT" != "" ]]; then
+  EXTRA2+=" -var nvidia_gpu_support=$NVIDIA_GPU_SUPPORT"
+fi
+
 
 
 echo
diff --git a/tools/compute-images/scripts/base.sh b/tools/compute-images/scripts/base.sh
index 0ab51223b..f180f81c4 100644
--- a/tools/compute-images/scripts/base.sh
+++ b/tools/compute-images/scripts/base.sh
@@ -149,3 +149,25 @@ $SUDO chmod 755 /usr/local/bin/ensure-encrypted-partitions.sh
 $SUDO chown root:root /usr/local/bin/ensure-encrypted-partitions.sh
 $SUDO mv /tmp/etc-cloud-cloud.cfg.d-07_compute_arvados_dispatch_cloud.cfg /etc/cloud/cloud.cfg.d/07_compute_arvados_dispatch_cloud.cfg
 $SUDO chown root:root /etc/cloud/cloud.cfg.d/07_compute_arvados_dispatch_cloud.cfg
+
+if [ "$NVIDIA_GPU_SUPPORT" == "1" ]; then
+  DIST=$(. /etc/os-release; echo $ID$VERSION_ID)
+  # We need a kernel and matching headers
+  $sudo apt-get -y install linux-image-cloud-amd64 linux-headers-cloud-amd64
+
+  # Install CUDA
+  $sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/$DIST/x86_64/7fa2af80.pub
+  $sudo apt-get -y install software-properties-common
+  $sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/$DIST/x86_64/ /"
+  $sudo add-apt-repository contrib
+  $sudo apt-get update
+  $sudo apt-get -y install cuda
+
+  # Install libnvidia-container, the tooling for Docker/Singularity
+  curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | \
+    $sudo apt-key add -
+  curl -s -L https://nvidia.github.io/libnvidia-container/$DIST/libnvidia-container.list | \
+    $sudo tee /etc/apt/sources.list.d/libnvidia-container.list
+  $sudo apt-get update
+  $sudo apt-get -y install libnvidia-container1 libnvidia-container-tools nvidia-container-toolkit
+fi

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list