[arvados] created: 2.1.0-2635-g5722e7f91

git repository hosting git at public.arvados.org
Wed Jun 29 17:55:04 UTC 2022


        at  5722e7f91d3ab4df898dec0d301c0653ac7995b3 (commit)


commit 5722e7f91d3ab4df898dec0d301c0653ac7995b3
Author: Tom Clegg <tom at curii.com>
Date:   Thu Mar 17 01:03:28 2022 -0400

    16552: Option to get TLS certificates automatically from LE.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/cmd/arvados-package/install.go b/cmd/arvados-package/install.go
index d8dbdcc4a..f923d5ef8 100644
--- a/cmd/arvados-package/install.go
+++ b/cmd/arvados-package/install.go
@@ -92,6 +92,7 @@ rm /etc/apt/sources.list.d/arvados-local.list
 	if opts.Live != "" {
 		cmd.Args = append(cmd.Args,
 			"--env=domain="+opts.Live,
+			"--env=initargs=-tls=acme",
 			"--env=bootargs=",
 			"--publish=:443:443",
 			"--publish=:4440-4460:4440-4460",
@@ -101,6 +102,7 @@ rm /etc/apt/sources.list.d/arvados-local.list
 	} else {
 		cmd.Args = append(cmd.Args,
 			"--env=domain=localhost",
+			"--env=initargs=-tls=insecure",
 			"--env=bootargs=-shutdown")
 	}
 	cmd.Args = append(cmd.Args,
@@ -122,7 +124,7 @@ eatmydata apt-get install --reinstall -y --no-install-recommends arvados-server-
 SUDO_FORCE_REMOVE=yes apt-get autoremove -y
 
 /etc/init.d/postgresql start
-arvados-server init -cluster-id x1234 -domain=$domain -login=test -insecure
+arvados-server init -cluster-id x1234 -domain=$domain -login=test $initargs
 exec arvados-server boot -listen-host=0.0.0.0 -no-workbench2=false $bootargs
 `)
 	cmd.Stdout = stdout
diff --git a/lib/boot/cert.go b/lib/boot/cert.go
index 916f9f53b..10fd0aa9f 100644
--- a/lib/boot/cert.go
+++ b/lib/boot/cert.go
@@ -6,19 +6,29 @@ package boot
 
 import (
 	"context"
+	"crypto/rsa"
+	"crypto/tls"
+	"crypto/x509"
+	"encoding/pem"
+	"errors"
 	"fmt"
 	"io/ioutil"
 	"net"
+	"net/http"
+	"net/url"
 	"os"
 	"path/filepath"
+	"strings"
+	"time"
+
+	"golang.org/x/crypto/acme"
+	"golang.org/x/crypto/acme/autocert"
 )
 
-// Create a root CA key and use it to make a new server
-// certificate+key pair.
-//
-// In future we'll make one root CA key per host instead of one per
-// cluster, so it only needs to be imported to a browser once for
-// ongoing dev/test usage.
+const stagingDirectoryURL = "https://acme-staging-v02.api.letsencrypt.org/directory"
+
+var errInvalidHost = errors.New("unrecognized target host in incoming TLS request")
+
 type createCertificates struct{}
 
 func (createCertificates) String() string {
@@ -26,8 +36,180 @@ func (createCertificates) String() string {
 }
 
 func (createCertificates) Run(ctx context.Context, fail func(error), super *Supervisor) error {
+	if super.cluster.TLS.Automatic {
+		return bootAutoCert(ctx, fail, super)
+	} else if super.cluster.TLS.Key == "" && super.cluster.TLS.Certificate == "" {
+		return createSelfSignedCert(ctx, fail, super)
+	} else {
+		return nil
+	}
+}
+
+// bootAutoCert uses Let's Encrypt to get certificates for all the
+// domains appearing in ExternalURLs, writes them to files where Nginx
+// can load them, and updates super.cluster.TLS fields (Key and
+// Certificiate) to point to those files.
+//
+// It also runs a background task to keep the files up to date.
+//
+// After bootAutoCert returns, other service components will get the
+// certificates they need by reading these files or by using a
+// read-only autocert cache.
+//
+// Currently this only works when port 80 of every ExternalURL domain
+// is routed to this host, i.e., on a single-node cluster. Wildcard
+// domains [for WebDAV] are not supported.
+func bootAutoCert(ctx context.Context, fail func(error), super *Supervisor) error {
+	hosts := map[string]bool{}
+	for _, svc := range super.cluster.Services.Map() {
+		u := url.URL(svc.ExternalURL)
+		if u.Scheme == "https" || u.Scheme == "wss" {
+			hosts[strings.ToLower(u.Hostname())] = true
+		}
+	}
+	mgr := &autocert.Manager{
+		Cache:  autocert.DirCache(super.tempdir + "/autocert"),
+		Prompt: autocert.AcceptTOS,
+		HostPolicy: func(ctx context.Context, host string) error {
+			if hosts[strings.ToLower(host)] {
+				return nil
+			} else {
+				return errInvalidHost
+			}
+		},
+	}
+	if super.cluster.TLS.Staging {
+		mgr.Client = &acme.Client{DirectoryURL: stagingDirectoryURL}
+	}
+	go func() {
+		err := http.ListenAndServe(":80", mgr.HTTPHandler(nil))
+		fail(fmt.Errorf("autocert http-01 challenge handler stopped: %w", err))
+	}()
+	u := url.URL(super.cluster.Services.Controller.ExternalURL)
+	extHost := u.Hostname()
+	update := func() error {
+		for h := range hosts {
+			cert, err := mgr.GetCertificate(&tls.ClientHelloInfo{ServerName: h})
+			if err != nil {
+				return err
+			}
+			if h == extHost {
+				err = writeCert(super.tempdir, "server.key", "server.crt", cert)
+				if err != nil {
+					return err
+				}
+			}
+		}
+		return nil
+	}
+	err := update()
+	if err != nil {
+		return err
+	}
+	go func() {
+		for range time.NewTicker(time.Hour).C {
+			err := update()
+			if err != nil {
+				super.logger.WithError(err).Error("error getting certificate from autocert")
+			}
+		}
+	}()
+	super.cluster.TLS.Key = "file://" + super.tempdir + "/server.key"
+	super.cluster.TLS.Certificate = "file://" + super.tempdir + "/server.crt"
+	return nil
+}
+
+// Save cert chain and key in a format Nginx can read.
+func writeCert(outdir, keyfile, certfile string, cert *tls.Certificate) error {
+	keytmp, err := os.CreateTemp(outdir, keyfile+".tmp.*")
+	if err != nil {
+		return err
+	}
+	defer keytmp.Close()
+	defer os.Remove(keytmp.Name())
+
+	certtmp, err := os.CreateTemp(outdir, certfile+".tmp.*")
+	if err != nil {
+		return err
+	}
+	defer certtmp.Close()
+	defer os.Remove(certtmp.Name())
+
+	switch privkey := cert.PrivateKey.(type) {
+	case *rsa.PrivateKey:
+		err = pem.Encode(keytmp, &pem.Block{
+			Type:  "RSA PRIVATE KEY",
+			Bytes: x509.MarshalPKCS1PrivateKey(privkey),
+		})
+		if err != nil {
+			return err
+		}
+	default:
+		buf, err := x509.MarshalPKCS8PrivateKey(privkey)
+		if err != nil {
+			return err
+		}
+		err = pem.Encode(keytmp, &pem.Block{
+			Type:  "PRIVATE KEY",
+			Bytes: buf,
+		})
+		if err != nil {
+			return err
+		}
+	}
+	err = keytmp.Close()
+	if err != nil {
+		return err
+	}
+
+	for _, cert := range cert.Certificate {
+		err = pem.Encode(certtmp, &pem.Block{
+			Type:  "CERTIFICATE",
+			Bytes: cert,
+		})
+		if err != nil {
+			return err
+		}
+	}
+	err = certtmp.Close()
+	if err != nil {
+		return err
+	}
+
+	err = os.Rename(keytmp.Name(), filepath.Join(outdir, keyfile))
+	if err != nil {
+		return err
+	}
+	err = os.Rename(certtmp.Name(), filepath.Join(outdir, certfile))
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// Create a root CA key and use it to make a new server
+// certificate+key pair.
+//
+// In future we'll make one root CA key per host instead of one per
+// cluster, so it only needs to be imported to a browser once for
+// ongoing dev/test usage.
+func createSelfSignedCert(ctx context.Context, fail func(error), super *Supervisor) error {
+	san := "DNS:localhost,DNS:localhost.localdomain"
+	if net.ParseIP(super.ListenHost) != nil {
+		san += fmt.Sprintf(",IP:%s", super.ListenHost)
+	} else {
+		san += fmt.Sprintf(",DNS:%s", super.ListenHost)
+	}
+	hostname, err := os.Hostname()
+	if err != nil {
+		return fmt.Errorf("hostname: %w", err)
+	}
+	if hostname != super.ListenHost {
+		san += ",DNS:" + hostname
+	}
+
 	// Generate root key
-	err := super.RunProgram(ctx, super.tempdir, runOptions{}, "openssl", "genrsa", "-out", "rootCA.key", "4096")
+	err = super.RunProgram(ctx, super.tempdir, runOptions{}, "openssl", "genrsa", "-out", "rootCA.key", "4096")
 	if err != nil {
 		return err
 	}
@@ -46,18 +228,6 @@ func (createCertificates) Run(ctx context.Context, fail func(error), super *Supe
 	if err != nil {
 		return err
 	}
-	hostname, err := os.Hostname()
-	if err != nil {
-		return fmt.Errorf("hostname: %w", err)
-	}
-	san := "DNS:localhost,DNS:localhost.localdomain,DNS:" + hostname
-	if super.ListenHost == hostname || super.ListenHost == "localhost" {
-		// already have it
-	} else if net.ParseIP(super.ListenHost) != nil {
-		san += fmt.Sprintf(",IP:%s", super.ListenHost)
-	} else {
-		san += fmt.Sprintf(",DNS:%s", super.ListenHost)
-	}
 	conf := append(defaultconf, []byte(fmt.Sprintf("\n[SAN]\nsubjectAltName=%s\n", san))...)
 	err = ioutil.WriteFile(filepath.Join(super.tempdir, "server.cfg"), conf, 0644)
 	if err != nil {
@@ -73,5 +243,7 @@ func (createCertificates) Run(ctx context.Context, fail func(error), super *Supe
 	if err != nil {
 		return err
 	}
+	super.cluster.TLS.Key = "file://" + super.tempdir + "/server.key"
+	super.cluster.TLS.Certificate = "file://" + super.tempdir + "/server.crt"
 	return nil
 }
diff --git a/lib/boot/nginx.go b/lib/boot/nginx.go
index e67bc1d90..48d3bba47 100644
--- a/lib/boot/nginx.go
+++ b/lib/boot/nginx.go
@@ -14,6 +14,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"regexp"
+	"strings"
 
 	"git.arvados.org/arvados.git/sdk/go/arvados"
 )
@@ -42,7 +43,10 @@ func (runNginx) Run(ctx context.Context, fail func(error), super *Supervisor) er
 	}
 	u := url.URL(super.cluster.Services.Controller.ExternalURL)
 	ctrlHost := u.Hostname()
-	if f, err := os.Open("/var/lib/acme/live/" + ctrlHost + "/privkey"); err == nil {
+	if strings.HasPrefix(super.cluster.TLS.Certificate, "file:/") && strings.HasPrefix(super.cluster.TLS.Key, "file:/") {
+		vars["SSLCERT"] = filepath.Clean(super.cluster.TLS.Certificate[5:])
+		vars["SSLKEY"] = filepath.Clean(super.cluster.TLS.Key[5:])
+	} else if f, err := os.Open("/var/lib/acme/live/" + ctrlHost + "/privkey"); err == nil {
 		f.Close()
 		vars["SSLCERT"] = "/var/lib/acme/live/" + ctrlHost + "/cert"
 		vars["SSLKEY"] = "/var/lib/acme/live/" + ctrlHost + "/privkey"
diff --git a/lib/boot/service.go b/lib/boot/service.go
index 090e85244..b27a74622 100644
--- a/lib/boot/service.go
+++ b/lib/boot/service.go
@@ -35,6 +35,7 @@ func (runner runServiceCommand) Run(ctx context.Context, fail func(error), super
 	if err != nil {
 		return err
 	}
+	super.wait(ctx, createCertificates{})
 	super.wait(ctx, runner.depends...)
 	for u := range runner.svc.InternalURLs {
 		u := u
@@ -82,6 +83,7 @@ func (runner runGoProgram) Run(ctx context.Context, fail func(error), super *Sup
 		return err
 	}
 
+	super.wait(ctx, createCertificates{})
 	super.wait(ctx, runner.depends...)
 	for u := range runner.svc.InternalURLs {
 		u := u
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 472a22c6b..29d9d9cc4 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -900,10 +900,23 @@ Clusters:
       Repositories: /var/lib/arvados/git/repositories
 
     TLS:
+      # Use "file:///var/lib/acme/live/example.com/cert" and ".../key"
+      # to load externally managed certificates.
       Certificate: ""
       Key: ""
+
+      # Accept invalid certificates when connecting to servers. Never
+      # use this in production.
       Insecure: false
 
+      # Agree to Let's Encrypt terms of service and obtain
+      # certificates automatically for ExternalURL domains.
+      Automatic: false
+
+      # Use Let's Encrypt staging environment instead of production
+      # environment.
+      Staging: false
+
     Containers:
       # List of supported Docker Registry image formats that compute nodes
       # are able to use. `arv keep docker` will error out if a user tries
diff --git a/lib/install/init.go b/lib/install/init.go
index d2fed1dd7..6954a60d8 100644
--- a/lib/install/init.go
+++ b/lib/install/init.go
@@ -34,7 +34,7 @@ type initCommand struct {
 	Domain             string
 	PostgreSQLPassword string
 	Login              string
-	Insecure           bool
+	TLS                string
 }
 
 func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
@@ -62,7 +62,7 @@ func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Read
 	flags.StringVar(&initcmd.ClusterID, "cluster-id", "", "cluster `id`, like x1234 for a dev cluster")
 	flags.StringVar(&initcmd.Domain, "domain", hostname, "cluster public DNS `name`, like x1234.arvadosapi.com")
 	flags.StringVar(&initcmd.Login, "login", "", "login `backend`: test, pam, or ''")
-	flags.BoolVar(&initcmd.Insecure, "insecure", false, "accept invalid TLS certificates and configure TrustAllContent (do not use in production!)")
+	flags.StringVar(&initcmd.TLS, "tls", "none", "tls certificate `source`: acme, auto, insecure, or none")
 	if ok, code := cmd.ParseFlags(flags, prog, args, "", stderr); !ok {
 		return code
 	} else if *versionFlag {
@@ -113,8 +113,8 @@ func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Read
           "http://0.0.0.0:9001/": {}
       Websocket:
         InternalURLs:
-          "http://0.0.0.0:9004/": {}
-        ExternalURL: {{printf "%q" ( print "wss://" .Domain ":4444/websocket" ) }}
+          "http://0.0.0.0:8005/": {}
+        ExternalURL: {{printf "%q" ( print "wss://" .Domain ":4436/" ) }}
       Keepbalance:
         InternalURLs:
           "http://0.0.0.0:9019/": {}
@@ -155,7 +155,7 @@ func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Read
           "http://0.0.0.0:9011/": {}
     Collections:
       BlobSigningKey: {{printf "%q" ( .RandomHex 50 )}}
-      {{if .Insecure}}
+      {{if eq .TLS "insecure"}}
       TrustAllContent: true
       {{end}}
     Containers:
@@ -171,10 +171,17 @@ func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Read
         user: arvados
         password: {{printf "%q" .PostgreSQLPassword}}
     SystemRootToken: {{printf "%q" ( .RandomHex 50 )}}
-    {{if .Insecure}}
     TLS:
+      {{if eq .TLS "insecure"}}
       Insecure: true
-    {{end}}
+      {{else if eq .TLS "auto"}}
+      Automatic: true
+      {{else if eq .TLS "acme"}}
+      Certificate: {{printf "%q" (print "/var/lib/acme/live/" .Domain "/cert")}}
+      Key: {{printf "%q" (print "/var/lib/acme/live/" .Domain "/privkey")}}
+      {{else}}
+      {}
+      {{end}}
     Volumes:
       {{.ClusterID}}-nyw5e-000000000000000:
         Driver: Directory
diff --git a/lib/service/cmd.go b/lib/service/cmd.go
index 4b640c4e4..20441c2a6 100644
--- a/lib/service/cmd.go
+++ b/lib/service/cmd.go
@@ -159,7 +159,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
 		Addr: listenURL.Host,
 	}
 	if listenURL.Scheme == "https" || listenURL.Scheme == "wss" {
-		tlsconfig, err := tlsConfigWithCertUpdater(cluster, logger)
+		tlsconfig, err := makeTLSConfig(cluster, logger)
 		if err != nil {
 			logger.WithError(err).Errorf("cannot start %s service on %s", c.svcName, listenURL.String())
 			return 1
diff --git a/lib/service/tls.go b/lib/service/tls.go
index c6307b76a..21cd3e2ac 100644
--- a/lib/service/tls.go
+++ b/lib/service/tls.go
@@ -5,6 +5,7 @@
 package service
 
 import (
+	"context"
 	"crypto/tls"
 	"errors"
 	"fmt"
@@ -12,18 +13,69 @@ import (
 	"os/signal"
 	"strings"
 	"syscall"
+	"time"
 
 	"git.arvados.org/arvados.git/sdk/go/arvados"
 	"github.com/sirupsen/logrus"
+	"golang.org/x/crypto/acme/autocert"
 )
 
-func tlsConfigWithCertUpdater(cluster *arvados.Cluster, logger logrus.FieldLogger) (*tls.Config, error) {
+func makeTLSConfig(cluster *arvados.Cluster, logger logrus.FieldLogger) (*tls.Config, error) {
+	if cluster.TLS.Automatic {
+		return makeAutocertConfig(cluster, logger)
+	} else {
+		return makeFileLoaderConfig(cluster, logger)
+	}
+}
+
+var errCertUnavailable = errors.New("certificate unavailable, waiting for supervisor to update cache")
+
+type readonlyDirCache autocert.DirCache
+
+func (c readonlyDirCache) Get(ctx context.Context, name string) ([]byte, error) {
+	data, err := autocert.DirCache(c).Get(ctx, name)
+	if err != nil {
+		// Returning an error other than autocert.ErrCacheMiss
+		// causes GetCertificate() to fail early instead of
+		// trying to obtain a certificate itself (which
+		// wouldn't work because we're not in a position to
+		// answer challenges).
+		return nil, errCertUnavailable
+	}
+	return data, nil
+}
+
+func (c readonlyDirCache) Put(ctx context.Context, name string, data []byte) error {
+	return fmt.Errorf("(bug?) (readonlyDirCache)Put(%s) called", name)
+}
+
+func (c readonlyDirCache) Delete(ctx context.Context, name string) error {
+	return nil
+}
+
+func makeAutocertConfig(cluster *arvados.Cluster, logger logrus.FieldLogger) (*tls.Config, error) {
+	mgr := &autocert.Manager{
+		Cache:  readonlyDirCache("/var/lib/arvados/tmp/autocert"),
+		Prompt: autocert.AcceptTOS,
+		// HostPolicy accepts all names because this Manager
+		// doesn't request certs. Whoever writes certs to our
+		// cache is effectively responsible for HostPolicy.
+		HostPolicy: func(ctx context.Context, host string) error { return nil },
+		// Keep using whatever's in the cache as long as
+		// possible. Assume some other process (see lib/boot)
+		// handles renewals.
+		RenewBefore: time.Second,
+	}
+	return mgr.TLSConfig(), nil
+}
+
+func makeFileLoaderConfig(cluster *arvados.Cluster, logger logrus.FieldLogger) (*tls.Config, error) {
 	currentCert := make(chan *tls.Certificate, 1)
 	loaded := false
 
-	key, cert := cluster.TLS.Key, cluster.TLS.Certificate
+	key := strings.TrimPrefix(cluster.TLS.Key, "file://")
+	cert := strings.TrimPrefix(cluster.TLS.Certificate, "file://")
 	if !strings.HasPrefix(key, "file://") || !strings.HasPrefix(cert, "file://") {
-		return nil, errors.New("cannot use TLS certificate: TLS.Key and TLS.Certificate must be specified with a 'file://' prefix")
 	}
 	key, cert = key[7:], cert[7:]
 
@@ -45,9 +97,14 @@ func tlsConfigWithCertUpdater(cluster *arvados.Cluster, logger logrus.FieldLogge
 		return nil, err
 	}
 
+	reload := make(chan os.Signal, 1)
+	signal.Notify(reload, syscall.SIGHUP)
+	go func() {
+		for range time.NewTicker(time.Hour).C {
+			reload <- nil
+		}
+	}()
 	go func() {
-		reload := make(chan os.Signal, 1)
-		signal.Notify(reload, syscall.SIGHUP)
 		for range reload {
 			err := update()
 			if err != nil {
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index c90551a61..d9aa92b65 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -227,6 +227,8 @@ type Cluster struct {
 		Certificate string
 		Key         string
 		Insecure    bool
+		Automatic   bool
+		Staging     bool
 	}
 	Users struct {
 		ActivatedUsersAreVisibleToOthers      bool

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list