[ARVADOS] updated: 1.3.0-333-g16589cd93

Git user git at public.curoverse.com
Wed Feb 13 14:44:37 EST 2019


Summary of changes:
 lib/dispatchcloud/worker/pool.go   |  6 +++++-
 lib/dispatchcloud/worker/worker.go |  3 ++-
 services/crunch-run/background.go  | 35 ++++++++++++++++++++---------------
 3 files changed, 27 insertions(+), 17 deletions(-)

  discards  014ddd17a11522dfe731c516366b020cb921f279 (commit)
  discards  7b5a2c03af304bcab57d4928177ebfe9db01b786 (commit)
  discards  dc940613ba74d19803321a2f1d641c7ce76cc8ce (commit)
  discards  9904bb0dd3ea220d6af185f7222ca033a9895acc (commit)
  discards  078d1522a164efc2a7acf8218ac865730cbfb308 (commit)
  discards  4e102006a8fb1734887d0d7f8ce6a81b7675d87d (commit)
  discards  485bb7abe728cf3cc1db80b75e12299252e81484 (commit)
  discards  6549a071cae733ec66952355a13eb42dbf918876 (commit)
       via  16589cd93e6780db6a07d7cc110724dea4c19e3e (commit)
       via  2873d55ea38c14f1c4cb711c2f9594fba42d41a8 (commit)
       via  3a1c0395034b52196834923ca31bc6f42aad9ed5 (commit)
       via  6e4237de7a0b595418df84ae2ff4232cf5566aa4 (commit)
       via  d63c18fb87f06fef9276b6c062acea7523ee8a98 (commit)
       via  55c07b5b9959d5d0d830b651a8e40e4151383c67 (commit)
       via  286e41383447c04df0e7f23f74df1495eeaa068f (commit)
       via  554d1808c57ac318710e1a90b25ebc16de3cbab1 (commit)
       via  e57e3e19bab60c71cfa0aaf93ceca7d7e9ec33bf (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (014ddd17a11522dfe731c516366b020cb921f279)
            \
             N -- N -- N (16589cd93e6780db6a07d7cc110724dea4c19e3e)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 16589cd93e6780db6a07d7cc110724dea4c19e3e
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 14:38:07 2019 -0500

    14807: Include more detail in errors.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go
index 0d4612908..334b3ab67 100644
--- a/services/crunch-run/background.go
+++ b/services/crunch-run/background.go
@@ -49,14 +49,15 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error {
 			return nil, err
 		}
 		defer dirlock.Close()
-		lockfile, err := os.OpenFile(filepath.Join(lockdir, lockprefix+uuid+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+		lockfilename := filepath.Join(lockdir, lockprefix+uuid+locksuffix)
+		lockfile, err := os.OpenFile(lockfilename, os.O_CREATE|os.O_RDWR, 0700)
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("open %s: %s", lockfilename, err)
 		}
 		err = syscall.Flock(int(lockfile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
 		if err != nil {
 			lockfile.Close()
-			return nil, err
+			return nil, fmt.Errorf("lock %s: %s", lockfilename, err)
 		}
 		return lockfile, nil
 	}()
@@ -91,7 +92,7 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error {
 	if err != nil {
 		os.Remove(outfile.Name())
 		os.Remove(errfile.Name())
-		return err
+		return fmt.Errorf("exec %s: %s", cmd.Path, err)
 	}
 
 	w := io.MultiWriter(stdout, lockfile)
@@ -123,14 +124,14 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
 	if os.IsNotExist(err) {
 		return nil
 	} else if err != nil {
-		return err
+		return fmt.Errorf("open %s: %s", path, err)
 	}
 	defer f.Close()
 
 	var pi procinfo
 	err = json.NewDecoder(f).Decode(&pi)
 	if err != nil {
-		return fmt.Errorf("%s: %s\n", path, err)
+		return fmt.Errorf("decode %s: %s\n", path, err)
 	}
 
 	if pi.UUID != uuid || pi.PID == 0 {
@@ -139,7 +140,7 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
 
 	proc, err := os.FindProcess(pi.PID)
 	if err != nil {
-		return err
+		return fmt.Errorf("%s: find process %d: %s", uuid, pi.PID, err)
 	}
 
 	err = proc.Signal(signal)
@@ -147,9 +148,9 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
 		err = proc.Signal(syscall.Signal(0))
 	}
 	if err == nil {
-		return fmt.Errorf("pid %d: sent signal %d (%s) but process is still alive", pi.PID, signal, signal)
+		return fmt.Errorf("%s: pid %d: sent signal %d (%s) but process is still alive", uuid, pi.PID, signal, signal)
 	}
-	fmt.Fprintf(stderr, "pid %d: %s\n", pi.PID, err)
+	fmt.Fprintf(stderr, "%s: pid %d: %s\n", uuid, pi.PID, err)
 	return nil
 }
 
@@ -189,7 +190,7 @@ func ListProcesses(stdout, stderr io.Writer) int {
 			err := os.Remove(path)
 			dirlock.Close()
 			if err != nil {
-				fmt.Fprintln(stderr, err)
+				fmt.Fprintf(stderr, "unlink %s: %s\n", f.Name(), err)
 			}
 			return nil
 		}
@@ -227,14 +228,15 @@ func exitcode(stderr io.Writer, err error) int {
 //
 // Caller releases the lock by closing the returned file.
 func lockall() (*os.File, error) {
-	f, err := os.OpenFile(filepath.Join(lockdir, lockprefix+"all"+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+	lockfile := filepath.Join(lockdir, lockprefix+"all"+locksuffix)
+	f, err := os.OpenFile(lockfile, os.O_CREATE|os.O_RDWR, 0700)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("open %s: %s", lockfile, err)
 	}
 	err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
 	if err != nil {
 		f.Close()
-		return nil, err
+		return nil, fmt.Errorf("lock %s: %s", lockfile, err)
 	}
 	return f, nil
 }

commit 2873d55ea38c14f1c4cb711c2f9594fba42d41a8
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 11:11:03 2019 -0500

    14807: Fix crunch-run --list output when /var/lock is a symlink.
    
    filepath.Walk(/var/lock) does not return entries inside /var/lock if
    /var/lock is a symlink, as it is on debian:9.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go
index a50853837..0d4612908 100644
--- a/services/crunch-run/background.go
+++ b/services/crunch-run/background.go
@@ -155,8 +155,11 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
 
 // List UUIDs of active crunch-run processes.
 func ListProcesses(stdout, stderr io.Writer) int {
-	return exitcode(stderr, filepath.Walk(lockdir, func(path string, info os.FileInfo, err error) error {
-		if info.IsDir() {
+	// filepath.Walk does not follow symlinks, so we must walk
+	// lockdir+"/." in case lockdir itself is a symlink.
+	walkdir := lockdir + "/."
+	return exitcode(stderr, filepath.Walk(walkdir, func(path string, info os.FileInfo, err error) error {
+		if info.IsDir() && path != walkdir {
 			return filepath.SkipDir
 		}
 		if name := info.Name(); !strings.HasPrefix(name, lockprefix) || !strings.HasSuffix(name, locksuffix) {

commit 3a1c0395034b52196834923ca31bc6f42aad9ed5
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:59:42 2019 -0500

    14807: Always set node-token tag.
    
    Azure driver expects it to be set to an unpredictable string.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index a5a454877..6f7163c8c 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -5,7 +5,9 @@
 package worker
 
 import (
+	"crypto/rand"
 	"errors"
+	"fmt"
 	"io"
 	"sort"
 	"strings"
@@ -22,6 +24,7 @@ import (
 const (
 	tagKeyInstanceType = "InstanceType"
 	tagKeyIdleBehavior = "IdleBehavior"
+	tagKeyNodeToken    = "node-token" // deprecated, but required by Azure driver
 )
 
 // An InstanceView shows a worker's current state and recent activity.
@@ -261,6 +264,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
 	tags := cloud.InstanceTags{
 		tagKeyInstanceType: it.Name,
 		tagKeyIdleBehavior: string(IdleBehaviorRun),
+		tagKeyNodeToken:    randomToken(),
 	}
 	now := time.Now()
 	wp.creating[it] = append(wp.creating[it], now)
@@ -781,3 +785,12 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
 		go wp.notify()
 	}
 }
+
+func randomToken() string {
+	buf := make([]byte, 32)
+	_, err := rand.Read(buf)
+	if err != nil {
+		panic(err)
+	}
+	return fmt.Sprintf("%x", buf)
+}

commit 6e4237de7a0b595418df84ae2ff4232cf5566aa4
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:57:46 2019 -0500

    14807: Log full instance ID.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index d4eca8bcb..a5a454877 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -347,7 +347,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
 
 	logger := wp.logger.WithFields(logrus.Fields{
 		"InstanceType": it.Name,
-		"Instance":     inst,
+		"Instance":     inst.ID(),
 		"Address":      inst.Address(),
 	})
 	logger.WithFields(logrus.Fields{
@@ -487,7 +487,7 @@ func (wp *Pool) KillContainer(uuid string) {
 func (wp *Pool) kill(wkr *worker, uuid string) {
 	logger := wp.logger.WithFields(logrus.Fields{
 		"ContainerUUID": uuid,
-		"Instance":      wkr.instance,
+		"Instance":      wkr.instance.ID(),
 	})
 	logger.Debug("killing process")
 	cmd := "crunch-run --kill 15 " + uuid
@@ -763,7 +763,7 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
 			continue
 		}
 		logger := wp.logger.WithFields(logrus.Fields{
-			"Instance":    wkr.instance,
+			"Instance":    wkr.instance.ID(),
 			"WorkerState": wkr.state,
 		})
 		logger.Info("instance disappeared in cloud")

commit d63c18fb87f06fef9276b6c062acea7523ee8a98
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:56:49 2019 -0500

    14807: Expose instance IP addresses in logs and management API.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 728372ed3..d4eca8bcb 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -27,6 +27,7 @@ const (
 // An InstanceView shows a worker's current state and recent activity.
 type InstanceView struct {
 	Instance             cloud.InstanceID `json:"instance"`
+	Address              string           `json:"address"`
 	Price                float64          `json:"price"`
 	ArvadosInstanceType  string           `json:"arvados_instance_type"`
 	ProviderInstanceType string           `json:"provider_instance_type"`
@@ -347,6 +348,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
 	logger := wp.logger.WithFields(logrus.Fields{
 		"InstanceType": it.Name,
 		"Instance":     inst,
+		"Address":      inst.Address(),
 	})
 	logger.WithFields(logrus.Fields{
 		"State":        initialState,
@@ -680,6 +682,7 @@ func (wp *Pool) Instances() []InstanceView {
 	for _, w := range wp.workers {
 		r = append(r, InstanceView{
 			Instance:             w.instance.ID(),
+			Address:              w.instance.Address(),
 			Price:                w.instType.Price,
 			ArvadosInstanceType:  w.instType.Name,
 			ProviderInstanceType: w.instType.ProviderType,

commit 55c07b5b9959d5d0d830b651a8e40e4151383c67
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:55:32 2019 -0500

    14807: Fix SSH target address.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
index 646e90f31..feed1c2a7 100644
--- a/lib/dispatchcloud/ssh_executor/executor.go
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -183,6 +183,9 @@ func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
 	if h, p, err := net.SplitHostPort(addr); err != nil || p == "" {
 		// Target address does not specify a port.  Use
 		// targetPort, or "ssh".
+		if h == "" {
+			h = addr
+		}
 		if p = exr.targetPort; p == "" {
 			p = "ssh"
 		}

commit 286e41383447c04df0e7f23f74df1495eeaa068f
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:52:21 2019 -0500

    14807: Accept .../instances/_/drain?instance_id=X.
    
    Azure Instance IDs contain slashes, so they don't work well as path
    parameters.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index b7ea15933..814dba917 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -212,6 +212,9 @@ func (disp *dispatcher) apiInstanceRun(w http.ResponseWriter, r *http.Request) {
 func (disp *dispatcher) apiInstanceIdleBehavior(w http.ResponseWriter, r *http.Request, want worker.IdleBehavior) {
 	params, _ := r.Context().Value(httprouter.ParamsKey).(httprouter.Params)
 	id := cloud.InstanceID(params.ByName("instance_id"))
+	if qp := r.FormValue("instance_id"); qp != "" {
+		id = cloud.InstanceID(qp)
+	}
 	err := disp.pool.SetIdleBehavior(id, want)
 	if err != nil {
 		httpserver.Error(w, err.Error(), http.StatusNotFound)

commit 554d1808c57ac318710e1a90b25ebc16de3cbab1
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:49:59 2019 -0500

    14807: Pass SSH public key to driver.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index 2d73afcd2..b7ea15933 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -130,7 +130,7 @@ func (disp *dispatcher) initialize() {
 	}
 	disp.instanceSet = instanceSet
 	disp.reg = prometheus.NewRegistry()
-	disp.pool = worker.NewPool(disp.logger, arvClient, disp.reg, disp.instanceSet, disp.newExecutor, disp.Cluster)
+	disp.pool = worker.NewPool(disp.logger, arvClient, disp.reg, disp.instanceSet, disp.newExecutor, disp.sshKey.PublicKey(), disp.Cluster)
 	disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, arvClient)
 
 	if disp.Cluster.ManagementToken == "" {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index df37326a0..728372ed3 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -16,6 +16,7 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/sirupsen/logrus"
+	"golang.org/x/crypto/ssh"
 )
 
 const (
@@ -84,7 +85,7 @@ func duration(conf arvados.Duration, def time.Duration) time.Duration {
 //
 // New instances are configured and set up according to the given
 // cluster configuration.
-func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, cluster *arvados.Cluster) *Pool {
+func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, installPublicKey ssh.PublicKey, cluster *arvados.Cluster) *Pool {
 	wp := &Pool{
 		logger:             logger,
 		arvClient:          arvClient,
@@ -100,6 +101,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
 		timeoutBooting:     duration(cluster.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
 		timeoutProbe:       duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
 		timeoutShutdown:    duration(cluster.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
+		installPublicKey:   installPublicKey,
 		stop:               make(chan bool),
 	}
 	wp.registerMetrics(reg)
@@ -130,6 +132,7 @@ type Pool struct {
 	timeoutBooting     time.Duration
 	timeoutProbe       time.Duration
 	timeoutShutdown    time.Duration
+	installPublicKey   ssh.PublicKey
 
 	// private state
 	subscribers  map[<-chan struct{}]chan<- struct{}
@@ -262,7 +265,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
 	wp.creating[it] = append(wp.creating[it], now)
 	go func() {
 		defer wp.notify()
-		inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
+		inst, err := wp.instanceSet.Create(it, wp.imageID, tags, wp.installPublicKey)
 		wp.mtx.Lock()
 		defer wp.mtx.Unlock()
 		// Remove our timestamp marker from wp.creating
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index 3b66eeb41..526bc552c 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -90,7 +90,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
 		},
 	}
 
-	pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster)
+	pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
 	notify := pool.Subscribe()
 	defer pool.Unsubscribe(notify)
 	pool.Create(type1)
@@ -108,7 +108,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
 
 	c.Log("------- starting new pool, waiting to recover state")
 
-	pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster)
+	pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
 	notify2 := pool2.Subscribe()
 	defer pool2.Unsubscribe(notify2)
 	waitForIdle(pool2, notify2)

commit e57e3e19bab60c71cfa0aaf93ceca7d7e9ec33bf
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:42:40 2019 -0500

    14807: Allow driver to specify SSH username.
    
    Use a non-root account with passwordless sudo on a provider (Azure)
    that can easily set that up, but can't easily set up direct root
    login.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/cloud/azure.go b/lib/cloud/azure.go
index a194b3318..6e46fce5f 100644
--- a/lib/cloud/azure.go
+++ b/lib/cloud/azure.go
@@ -44,6 +44,7 @@ type AzureInstanceSetConfig struct {
 	BlobContainer                string  `mapstructure:"blob_container"`
 	Image                        string  `mapstructure:"image"`
 	DeleteDanglingResourcesAfter float64 `mapstructure:"delete_dangling_resources_after"`
+	AdminUsername                string
 }
 
 type VirtualMachinesClientWrapper interface {
@@ -367,7 +368,7 @@ func (az *AzureInstanceSet) Create(
 		name)
 
 	customData := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(`#!/bin/sh
-echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
+echo '%s-%s' > '/home/%s/node-token'`, name, newTags["node-token"], az.azconfig.AdminUsername)))
 
 	vmParameters := compute.VirtualMachine{
 		Location: &az.azconfig.Location,
@@ -401,13 +402,13 @@ echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
 			},
 			OsProfile: &compute.OSProfile{
 				ComputerName:  &name,
-				AdminUsername: to.StringPtr("crunch"),
+				AdminUsername: to.StringPtr(az.azconfig.AdminUsername),
 				LinuxConfiguration: &compute.LinuxConfiguration{
 					DisablePasswordAuthentication: to.BoolPtr(true),
 					SSH: &compute.SSHConfiguration{
 						PublicKeys: &[]compute.SSHPublicKey{
-							compute.SSHPublicKey{
-								Path:    to.StringPtr("/home/crunch/.ssh/authorized_keys"),
+							{
+								Path:    to.StringPtr("/home/" + az.azconfig.AdminUsername + "/.ssh/authorized_keys"),
 								KeyData: to.StringPtr(string(ssh.MarshalAuthorizedKey(publicKey))),
 							},
 						},
@@ -629,6 +630,10 @@ func (ai *AzureInstance) Address() string {
 	return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
 }
 
+func (ai *AzureInstance) RemoteUser() string {
+	return ai.provider.azconfig.AdminUsername
+}
+
 func (ai *AzureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Client) error {
 	ai.provider.stopWg.Add(1)
 	defer ai.provider.stopWg.Done()
@@ -656,7 +661,7 @@ func (ai *AzureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Cl
 		return err
 	}
 
-	nodetokenbytes, err := sess.Output("cat /home/crunch/node-token")
+	nodetokenbytes, err := sess.Output("cat /home/" + ai.provider.azconfig.AdminUsername + "/node-token")
 	if err != nil {
 		return err
 	}
diff --git a/lib/cloud/azure_test.go b/lib/cloud/azure_test.go
index f74688bb1..0b8aa8431 100644
--- a/lib/cloud/azure_test.go
+++ b/lib/cloud/azure_test.go
@@ -23,6 +23,7 @@
 // image: "https://example.blob.core.windows.net/system/Microsoft.Compute/Images/images/zzzzz-compute-osDisk.XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.vhd"
 // delete_dangling_resources_after: 20
 // authorized_key: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDLQS1ExT2+WjA0d/hntEAyAtgeN1W2ik2QX8c2zO6HjlPHWXL92r07W0WMuDib40Pcevpi1BXeBWXA9ZB5KKMJB+ukaAu22KklnQuUmNvk6ZXnPKSkGxuCYvPQb08WhHf3p1VxiKfP3iauedBDM4x9/bkJohlBBQiFXzNUcQ+a6rKiMzmJN2gbL8ncyUzc+XQ5q4JndTwTGtOlzDiGOc9O4z5Dd76wtAVJneOuuNpwfFRVHThpJM6VThpCZOnl8APaceWXKeuwOuCae3COZMz++xQfxOfZ9Z8aIwo+TlQhsRaNfZ4Vjrop6ej8dtfZtgUFKfbXEOYaHrGrWGotFDTD example at example"
+// AdminUsername: crunch
 
 package cloud
 
diff --git a/lib/cloud/interfaces.go b/lib/cloud/interfaces.go
index 969a4bc2d..3fb776e09 100644
--- a/lib/cloud/interfaces.go
+++ b/lib/cloud/interfaces.go
@@ -62,6 +62,9 @@ type ExecutorTarget interface {
 	// unknown while instance is booting.
 	Address() string
 
+	// Remote username to send during SSH authentication.
+	RemoteUser() string
+
 	// Return nil if the given public key matches the instance's
 	// SSH server key. If the provided Dialer is not nil,
 	// VerifyHostKey can use it to make outgoing network
diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
index d0fb54c54..646e90f31 100644
--- a/lib/dispatchcloud/ssh_executor/executor.go
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -38,6 +38,7 @@ func New(t cloud.ExecutorTarget) *Executor {
 type Executor struct {
 	target     cloud.ExecutorTarget
 	targetPort string
+	targetUser string
 	signers    []ssh.Signer
 	mtx        sync.RWMutex // controls access to instance after creation
 
@@ -189,7 +190,7 @@ func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
 	}
 	var receivedKey ssh.PublicKey
 	client, err := ssh.Dial("tcp", addr, &ssh.ClientConfig{
-		User: "root",
+		User: target.RemoteUser(),
 		Auth: []ssh.AuthMethod{
 			ssh.PublicKeys(exr.signers...),
 		},
diff --git a/lib/dispatchcloud/ssh_executor/executor_test.go b/lib/dispatchcloud/ssh_executor/executor_test.go
index f8565b4a7..e7c023586 100644
--- a/lib/dispatchcloud/ssh_executor/executor_test.go
+++ b/lib/dispatchcloud/ssh_executor/executor_test.go
@@ -73,6 +73,7 @@ func (s *ExecutorSuite) TestBadHostKey(c *check.C) {
 				return 0
 			},
 			HostKey:        hostpriv,
+			AuthorizedUser: "username",
 			AuthorizedKeys: []ssh.PublicKey{clientpub},
 		},
 	}
@@ -121,6 +122,7 @@ func (s *ExecutorSuite) TestExecute(c *check.C) {
 					return uint32(exitcode)
 				},
 				HostKey:        hostpriv,
+				AuthorizedUser: "username",
 				AuthorizedKeys: []ssh.PublicKey{clientpub},
 			},
 		}
diff --git a/lib/dispatchcloud/test/ssh_service.go b/lib/dispatchcloud/test/ssh_service.go
index ed5995f4c..b81aa6aad 100644
--- a/lib/dispatchcloud/test/ssh_service.go
+++ b/lib/dispatchcloud/test/ssh_service.go
@@ -39,6 +39,7 @@ type SSHExecFunc func(env map[string]string, command string, stdin io.Reader, st
 type SSHService struct {
 	Exec           SSHExecFunc
 	HostKey        ssh.Signer
+	AuthorizedUser string
 	AuthorizedKeys []ssh.PublicKey
 
 	listener net.Listener
@@ -64,6 +65,11 @@ func (ss *SSHService) Address() string {
 	return ln.Addr().String()
 }
 
+// User returns the username that will be accepted.
+func (ss *SSHService) User() string {
+	return ss.AuthorizedUser
+}
+
 // Close shuts down the server and releases resources. Established
 // connections are unaffected.
 func (ss *SSHService) Close() {
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index 209d92121..e4f16b41a 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -121,6 +121,7 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
 	}
 	svm.SSHService = SSHService{
 		HostKey:        sis.driver.HostKey,
+		AuthorizedUser: "root",
 		AuthorizedKeys: ak,
 		Exec:           svm.Exec,
 	}
@@ -327,6 +328,10 @@ func (si stubInstance) Address() string {
 	return si.addr
 }
 
+func (si stubInstance) RemoteUser() string {
+	return si.svm.SSHService.AuthorizedUser
+}
+
 func (si stubInstance) Destroy() error {
 	sis := si.svm.sis
 	if sis.driver.HoldCloudOps {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index e6b506298..df37326a0 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -485,7 +485,11 @@ func (wp *Pool) kill(wkr *worker, uuid string) {
 		"Instance":      wkr.instance,
 	})
 	logger.Debug("killing process")
-	stdout, stderr, err := wkr.executor.Execute(nil, "crunch-run --kill 15 "+uuid, nil)
+	cmd := "crunch-run --kill 15 " + uuid
+	if u := wkr.instance.RemoteUser(); u != "root" {
+		cmd = "sudo " + cmd
+	}
+	stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
 	if err != nil {
 		logger.WithFields(logrus.Fields{
 			"stderr": string(stderr),
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index d0810f7a8..fbb6981ca 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -101,10 +101,14 @@ func (wkr *worker) startContainer(ctr arvados.Container) {
 	wkr.starting[ctr.UUID] = struct{}{}
 	wkr.state = StateRunning
 	go func() {
+		cmd := "crunch-run --detach '" + ctr.UUID + "'"
 		stdin := bytes.NewBufferString(fmt.Sprintf("export %s=%q\nexport %s=%q\n",
 			"ARVADOS_API_HOST", wkr.wp.arvClient.APIHost,
 			"ARVADOS_API_TOKEN", wkr.wp.arvClient.AuthToken))
-		cmd := "source /dev/stdin; crunch-run --detach '" + ctr.UUID + "'"
+		if u := wkr.instance.RemoteUser(); u != "root" {
+			cmd = "sudo -E " + cmd
+		}
+		cmd = "source /dev/stdin; " + cmd
 		stdout, stderr, err := wkr.executor.Execute(nil, cmd, stdin)
 		wkr.mtx.Lock()
 		defer wkr.mtx.Unlock()
@@ -325,6 +329,9 @@ func (wkr *worker) probeAndUpdate() {
 
 func (wkr *worker) probeRunning() (running []string, ok bool) {
 	cmd := "crunch-run --list"
+	if u := wkr.instance.RemoteUser(); u != "root" {
+		cmd = "sudo " + cmd
+	}
 	stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
 	if err != nil {
 		wkr.logger.WithFields(logrus.Fields{

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list