[ARVADOS] created: 1.3.0-331-g7b5a2c03a

Git user git at public.curoverse.com
Wed Feb 13 02:02:22 EST 2019


        at  7b5a2c03af304bcab57d4928177ebfe9db01b786 (commit)


commit 7b5a2c03af304bcab57d4928177ebfe9db01b786
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:59:42 2019 -0500

    14807: Always set node-token tag.
    
    Azure driver expects it to be set to an unpredictable string.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 96c331bf6..e7aedc54d 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -5,7 +5,9 @@
 package worker
 
 import (
+	"crypto/rand"
 	"errors"
+	"fmt"
 	"io"
 	"sort"
 	"strings"
@@ -22,6 +24,7 @@ import (
 const (
 	tagKeyInstanceType = "InstanceType"
 	tagKeyIdleBehavior = "IdleBehavior"
+	tagKeyNodeToken    = "node-token" // deprecated, but required by Azure driver
 )
 
 // An InstanceView shows a worker's current state and recent activity.
@@ -261,6 +264,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
 	tags := cloud.InstanceTags{
 		tagKeyInstanceType: it.Name,
 		tagKeyIdleBehavior: string(IdleBehaviorRun),
+		tagKeyNodeToken:    randomToken(),
 	}
 	now := time.Now()
 	wp.creating[it] = append(wp.creating[it], now)
@@ -777,3 +781,12 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
 		go wp.notify()
 	}
 }
+
+func randomToken() string {
+	buf := make([]byte, 32)
+	_, err := rand.Read(buf)
+	if err != nil {
+		panic(err)
+	}
+	return fmt.Sprintf("%x", buf)
+}

commit dc940613ba74d19803321a2f1d641c7ce76cc8ce
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:57:46 2019 -0500

    14807: Log full instance ID.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 2f82b82c2..96c331bf6 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -347,7 +347,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
 
 	logger := wp.logger.WithFields(logrus.Fields{
 		"InstanceType": it.Name,
-		"Instance":     inst,
+		"Instance":     inst.ID(),
 		"Address":      inst.Address(),
 	})
 	logger.WithFields(logrus.Fields{
@@ -487,7 +487,7 @@ func (wp *Pool) KillContainer(uuid string) {
 func (wp *Pool) kill(wkr *worker, uuid string) {
 	logger := wp.logger.WithFields(logrus.Fields{
 		"ContainerUUID": uuid,
-		"Instance":      wkr.instance,
+		"Instance":      wkr.instance.ID(),
 	})
 	logger.Debug("killing process")
 	stdout, stderr, err := wkr.executor.Execute(nil, "crunch-run --kill 15 "+uuid, nil)
@@ -759,7 +759,7 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
 			continue
 		}
 		logger := wp.logger.WithFields(logrus.Fields{
-			"Instance":    wkr.instance,
+			"Instance":    wkr.instance.ID(),
 			"WorkerState": wkr.state,
 		})
 		logger.Info("instance disappeared in cloud")

commit 9904bb0dd3ea220d6af185f7222ca033a9895acc
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:56:49 2019 -0500

    14807: Expose instance IP addresses in logs and managemnet API.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index b5d89b20a..2f82b82c2 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -27,6 +27,7 @@ const (
 // An InstanceView shows a worker's current state and recent activity.
 type InstanceView struct {
 	Instance             cloud.InstanceID `json:"instance"`
+	Address              string           `json:"address"`
 	Price                float64          `json:"price"`
 	ArvadosInstanceType  string           `json:"arvados_instance_type"`
 	ProviderInstanceType string           `json:"provider_instance_type"`
@@ -347,6 +348,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
 	logger := wp.logger.WithFields(logrus.Fields{
 		"InstanceType": it.Name,
 		"Instance":     inst,
+		"Address":      inst.Address(),
 	})
 	logger.WithFields(logrus.Fields{
 		"State":        initialState,
@@ -676,6 +678,7 @@ func (wp *Pool) Instances() []InstanceView {
 	for _, w := range wp.workers {
 		r = append(r, InstanceView{
 			Instance:             w.instance.ID(),
+			Address:              w.instance.Address(),
 			Price:                w.instType.Price,
 			ArvadosInstanceType:  w.instType.Name,
 			ProviderInstanceType: w.instType.ProviderType,

commit 078d1522a164efc2a7acf8218ac865730cbfb308
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:55:32 2019 -0500

    14807: Fix SSH target address.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
index 646e90f31..feed1c2a7 100644
--- a/lib/dispatchcloud/ssh_executor/executor.go
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -183,6 +183,9 @@ func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
 	if h, p, err := net.SplitHostPort(addr); err != nil || p == "" {
 		// Target address does not specify a port.  Use
 		// targetPort, or "ssh".
+		if h == "" {
+			h = addr
+		}
 		if p = exr.targetPort; p == "" {
 			p = "ssh"
 		}

commit 4e102006a8fb1734887d0d7f8ce6a81b7675d87d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:52:21 2019 -0500

    14807: Accept .../instances/_/drain?instance_id=X.
    
    Azure Instance IDs contain slashes, so they don't work well as path
    parameters.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index b7ea15933..814dba917 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -212,6 +212,9 @@ func (disp *dispatcher) apiInstanceRun(w http.ResponseWriter, r *http.Request) {
 func (disp *dispatcher) apiInstanceIdleBehavior(w http.ResponseWriter, r *http.Request, want worker.IdleBehavior) {
 	params, _ := r.Context().Value(httprouter.ParamsKey).(httprouter.Params)
 	id := cloud.InstanceID(params.ByName("instance_id"))
+	if qp := r.FormValue("instance_id"); qp != "" {
+		id = cloud.InstanceID(qp)
+	}
 	err := disp.pool.SetIdleBehavior(id, want)
 	if err != nil {
 		httpserver.Error(w, err.Error(), http.StatusNotFound)

commit 485bb7abe728cf3cc1db80b75e12299252e81484
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:49:59 2019 -0500

    14807: Pass SSH public key to driver.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index 2d73afcd2..b7ea15933 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -130,7 +130,7 @@ func (disp *dispatcher) initialize() {
 	}
 	disp.instanceSet = instanceSet
 	disp.reg = prometheus.NewRegistry()
-	disp.pool = worker.NewPool(disp.logger, arvClient, disp.reg, disp.instanceSet, disp.newExecutor, disp.Cluster)
+	disp.pool = worker.NewPool(disp.logger, arvClient, disp.reg, disp.instanceSet, disp.newExecutor, disp.sshKey.PublicKey(), disp.Cluster)
 	disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, arvClient)
 
 	if disp.Cluster.ManagementToken == "" {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index e6b506298..b5d89b20a 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -16,6 +16,7 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/sirupsen/logrus"
+	"golang.org/x/crypto/ssh"
 )
 
 const (
@@ -84,7 +85,7 @@ func duration(conf arvados.Duration, def time.Duration) time.Duration {
 //
 // New instances are configured and set up according to the given
 // cluster configuration.
-func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, cluster *arvados.Cluster) *Pool {
+func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, installPublicKey ssh.PublicKey, cluster *arvados.Cluster) *Pool {
 	wp := &Pool{
 		logger:             logger,
 		arvClient:          arvClient,
@@ -100,6 +101,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
 		timeoutBooting:     duration(cluster.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
 		timeoutProbe:       duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
 		timeoutShutdown:    duration(cluster.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
+		installPublicKey:   installPublicKey,
 		stop:               make(chan bool),
 	}
 	wp.registerMetrics(reg)
@@ -130,6 +132,7 @@ type Pool struct {
 	timeoutBooting     time.Duration
 	timeoutProbe       time.Duration
 	timeoutShutdown    time.Duration
+	installPublicKey   ssh.PublicKey
 
 	// private state
 	subscribers  map[<-chan struct{}]chan<- struct{}
@@ -262,7 +265,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
 	wp.creating[it] = append(wp.creating[it], now)
 	go func() {
 		defer wp.notify()
-		inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
+		inst, err := wp.instanceSet.Create(it, wp.imageID, tags, wp.installPublicKey)
 		wp.mtx.Lock()
 		defer wp.mtx.Unlock()
 		// Remove our timestamp marker from wp.creating
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index 3b66eeb41..526bc552c 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -90,7 +90,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
 		},
 	}
 
-	pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster)
+	pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
 	notify := pool.Subscribe()
 	defer pool.Unsubscribe(notify)
 	pool.Create(type1)
@@ -108,7 +108,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
 
 	c.Log("------- starting new pool, waiting to recover state")
 
-	pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster)
+	pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
 	notify2 := pool2.Subscribe()
 	defer pool2.Unsubscribe(notify2)
 	waitForIdle(pool2, notify2)

commit 6549a071cae733ec66952355a13eb42dbf918876
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:42:40 2019 -0500

    14807: Allow driver to specify SSH username.
    
    Use a non-root account with passwordless sudo on a provider (Azure)
    that can easily set that up, but can't easily set up direct root
    login.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/cloud/azure.go b/lib/cloud/azure.go
index a194b3318..6e46fce5f 100644
--- a/lib/cloud/azure.go
+++ b/lib/cloud/azure.go
@@ -44,6 +44,7 @@ type AzureInstanceSetConfig struct {
 	BlobContainer                string  `mapstructure:"blob_container"`
 	Image                        string  `mapstructure:"image"`
 	DeleteDanglingResourcesAfter float64 `mapstructure:"delete_dangling_resources_after"`
+	AdminUsername                string
 }
 
 type VirtualMachinesClientWrapper interface {
@@ -367,7 +368,7 @@ func (az *AzureInstanceSet) Create(
 		name)
 
 	customData := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(`#!/bin/sh
-echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
+echo '%s-%s' > '/home/%s/node-token'`, name, newTags["node-token"], az.azconfig.AdminUsername)))
 
 	vmParameters := compute.VirtualMachine{
 		Location: &az.azconfig.Location,
@@ -401,13 +402,13 @@ echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
 			},
 			OsProfile: &compute.OSProfile{
 				ComputerName:  &name,
-				AdminUsername: to.StringPtr("crunch"),
+				AdminUsername: to.StringPtr(az.azconfig.AdminUsername),
 				LinuxConfiguration: &compute.LinuxConfiguration{
 					DisablePasswordAuthentication: to.BoolPtr(true),
 					SSH: &compute.SSHConfiguration{
 						PublicKeys: &[]compute.SSHPublicKey{
-							compute.SSHPublicKey{
-								Path:    to.StringPtr("/home/crunch/.ssh/authorized_keys"),
+							{
+								Path:    to.StringPtr("/home/" + az.azconfig.AdminUsername + "/.ssh/authorized_keys"),
 								KeyData: to.StringPtr(string(ssh.MarshalAuthorizedKey(publicKey))),
 							},
 						},
@@ -629,6 +630,10 @@ func (ai *AzureInstance) Address() string {
 	return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
 }
 
+func (ai *AzureInstance) RemoteUser() string {
+	return ai.provider.azconfig.AdminUsername
+}
+
 func (ai *AzureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Client) error {
 	ai.provider.stopWg.Add(1)
 	defer ai.provider.stopWg.Done()
@@ -656,7 +661,7 @@ func (ai *AzureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Cl
 		return err
 	}
 
-	nodetokenbytes, err := sess.Output("cat /home/crunch/node-token")
+	nodetokenbytes, err := sess.Output("cat /home/" + ai.provider.azconfig.AdminUsername + "/node-token")
 	if err != nil {
 		return err
 	}
diff --git a/lib/cloud/azure_test.go b/lib/cloud/azure_test.go
index f74688bb1..0b8aa8431 100644
--- a/lib/cloud/azure_test.go
+++ b/lib/cloud/azure_test.go
@@ -23,6 +23,7 @@
 // image: "https://example.blob.core.windows.net/system/Microsoft.Compute/Images/images/zzzzz-compute-osDisk.XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.vhd"
 // delete_dangling_resources_after: 20
 // authorized_key: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDLQS1ExT2+WjA0d/hntEAyAtgeN1W2ik2QX8c2zO6HjlPHWXL92r07W0WMuDib40Pcevpi1BXeBWXA9ZB5KKMJB+ukaAu22KklnQuUmNvk6ZXnPKSkGxuCYvPQb08WhHf3p1VxiKfP3iauedBDM4x9/bkJohlBBQiFXzNUcQ+a6rKiMzmJN2gbL8ncyUzc+XQ5q4JndTwTGtOlzDiGOc9O4z5Dd76wtAVJneOuuNpwfFRVHThpJM6VThpCZOnl8APaceWXKeuwOuCae3COZMz++xQfxOfZ9Z8aIwo+TlQhsRaNfZ4Vjrop6ej8dtfZtgUFKfbXEOYaHrGrWGotFDTD example at example"
+// AdminUsername: crunch
 
 package cloud
 
diff --git a/lib/cloud/interfaces.go b/lib/cloud/interfaces.go
index 969a4bc2d..3fb776e09 100644
--- a/lib/cloud/interfaces.go
+++ b/lib/cloud/interfaces.go
@@ -62,6 +62,9 @@ type ExecutorTarget interface {
 	// unknown while instance is booting.
 	Address() string
 
+	// Remote username to send during SSH authentication.
+	RemoteUser() string
+
 	// Return nil if the given public key matches the instance's
 	// SSH server key. If the provided Dialer is not nil,
 	// VerifyHostKey can use it to make outgoing network
diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
index d0fb54c54..646e90f31 100644
--- a/lib/dispatchcloud/ssh_executor/executor.go
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -38,6 +38,7 @@ func New(t cloud.ExecutorTarget) *Executor {
 type Executor struct {
 	target     cloud.ExecutorTarget
 	targetPort string
+	targetUser string
 	signers    []ssh.Signer
 	mtx        sync.RWMutex // controls access to instance after creation
 
@@ -189,7 +190,7 @@ func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
 	}
 	var receivedKey ssh.PublicKey
 	client, err := ssh.Dial("tcp", addr, &ssh.ClientConfig{
-		User: "root",
+		User: target.RemoteUser(),
 		Auth: []ssh.AuthMethod{
 			ssh.PublicKeys(exr.signers...),
 		},
diff --git a/lib/dispatchcloud/ssh_executor/executor_test.go b/lib/dispatchcloud/ssh_executor/executor_test.go
index f8565b4a7..e7c023586 100644
--- a/lib/dispatchcloud/ssh_executor/executor_test.go
+++ b/lib/dispatchcloud/ssh_executor/executor_test.go
@@ -73,6 +73,7 @@ func (s *ExecutorSuite) TestBadHostKey(c *check.C) {
 				return 0
 			},
 			HostKey:        hostpriv,
+			AuthorizedUser: "username",
 			AuthorizedKeys: []ssh.PublicKey{clientpub},
 		},
 	}
@@ -121,6 +122,7 @@ func (s *ExecutorSuite) TestExecute(c *check.C) {
 					return uint32(exitcode)
 				},
 				HostKey:        hostpriv,
+				AuthorizedUser: "username",
 				AuthorizedKeys: []ssh.PublicKey{clientpub},
 			},
 		}
diff --git a/lib/dispatchcloud/test/ssh_service.go b/lib/dispatchcloud/test/ssh_service.go
index ed5995f4c..b81aa6aad 100644
--- a/lib/dispatchcloud/test/ssh_service.go
+++ b/lib/dispatchcloud/test/ssh_service.go
@@ -39,6 +39,7 @@ type SSHExecFunc func(env map[string]string, command string, stdin io.Reader, st
 type SSHService struct {
 	Exec           SSHExecFunc
 	HostKey        ssh.Signer
+	AuthorizedUser string
 	AuthorizedKeys []ssh.PublicKey
 
 	listener net.Listener
@@ -64,6 +65,11 @@ func (ss *SSHService) Address() string {
 	return ln.Addr().String()
 }
 
+// User returns the username that will be accepted.
+func (ss *SSHService) User() string {
+	return ss.AuthorizedUser
+}
+
 // Close shuts down the server and releases resources. Established
 // connections are unaffected.
 func (ss *SSHService) Close() {
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index 209d92121..e4f16b41a 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -121,6 +121,7 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
 	}
 	svm.SSHService = SSHService{
 		HostKey:        sis.driver.HostKey,
+		AuthorizedUser: "root",
 		AuthorizedKeys: ak,
 		Exec:           svm.Exec,
 	}
@@ -327,6 +328,10 @@ func (si stubInstance) Address() string {
 	return si.addr
 }
 
+func (si stubInstance) RemoteUser() string {
+	return si.svm.SSHService.AuthorizedUser
+}
+
 func (si stubInstance) Destroy() error {
 	sis := si.svm.sis
 	if sis.driver.HoldCloudOps {
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index d0810f7a8..4064be058 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -105,6 +105,9 @@ func (wkr *worker) startContainer(ctr arvados.Container) {
 			"ARVADOS_API_HOST", wkr.wp.arvClient.APIHost,
 			"ARVADOS_API_TOKEN", wkr.wp.arvClient.AuthToken))
 		cmd := "source /dev/stdin; crunch-run --detach '" + ctr.UUID + "'"
+		if u := wkr.instance.RemoteUser(); u != "root" {
+			cmd = "sudo -E " + cmd
+		}
 		stdout, stderr, err := wkr.executor.Execute(nil, cmd, stdin)
 		wkr.mtx.Lock()
 		defer wkr.mtx.Unlock()
@@ -325,6 +328,9 @@ func (wkr *worker) probeAndUpdate() {
 
 func (wkr *worker) probeRunning() (running []string, ok bool) {
 	cmd := "crunch-run --list"
+	if u := wkr.instance.RemoteUser(); u != "root" {
+		cmd = "sudo " + cmd
+	}
 	stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
 	if err != nil {
 		wkr.logger.WithFields(logrus.Fields{

commit 45113a215e8ced04edf2cf29febe32598e6ad8b0
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:34:43 2019 -0500

    14807: When ProviderType is unspecified, default to Arvados type.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 5491200cb..40d6b6598 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -168,6 +168,9 @@ func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
 			if _, ok := (*it)[t.Name]; ok {
 				return errDuplicateInstanceTypeName
 			}
+			if t.ProviderType == "" {
+				t.ProviderType = t.Name
+			}
 			(*it)[t.Name] = t
 		}
 		return nil
@@ -177,10 +180,14 @@ func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
 	if err != nil {
 		return err
 	}
-	// Fill in Name field using hash key.
+	// Fill in Name field (and ProviderType field, if not
+	// specified) using hash key.
 	*it = InstanceTypeMap(hash)
 	for name, t := range *it {
 		t.Name = name
+		if t.ProviderType == "" {
+			t.ProviderType = name
+		}
 		(*it)[name] = t
 	}
 	return nil

commit ddcb0fb32f049ae853c502fead4b8ca6696d37ad
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Wed Feb 13 01:32:34 2019 -0500

    14807: Pass env vars on stdin instead of using SSH feature.
    
    Arbitrary environment variables are typically not accepted by SSH
    server configs.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index a2231673f..209d92121 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -9,6 +9,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"io/ioutil"
 	math_rand "math/rand"
 	"regexp"
 	"strings"
@@ -200,6 +201,11 @@ func (svm *StubVM) Instance() stubInstance {
 }
 
 func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+	stdinData, err := ioutil.ReadAll(stdin)
+	if err != nil {
+		fmt.Fprintf(stderr, "error reading stdin: %s\n", err)
+		return 1
+	}
 	queue := svm.sis.driver.Queue
 	uuid := regexp.MustCompile(`.{5}-dz642-.{15}`).FindString(command)
 	if eta := svm.Boot.Sub(time.Now()); eta > 0 {
@@ -214,10 +220,17 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
 		fmt.Fprint(stderr, "crunch-run: command not found\n")
 		return 1
 	}
-	if strings.HasPrefix(command, "crunch-run --detach ") {
+	if strings.HasPrefix(command, "source /dev/stdin; crunch-run --detach ") {
+		stdinKV := map[string]string{}
+		for _, line := range strings.Split(string(stdinData), "\n") {
+			kv := strings.SplitN(strings.TrimPrefix(line, "export "), "=", 2)
+			if len(kv) == 2 && len(kv[1]) > 0 {
+				stdinKV[kv[0]] = kv[1]
+			}
+		}
 		for _, name := range []string{"ARVADOS_API_HOST", "ARVADOS_API_TOKEN"} {
-			if env[name] == "" {
-				fmt.Fprintf(stderr, "%s missing from environment %q\n", name, env)
+			if stdinKV[name] == "" {
+				fmt.Fprintf(stderr, "%s env var missing from stdin %q\n", name, stdin)
 				return 1
 			}
 		}
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index a24747267..d0810f7a8 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -101,11 +101,11 @@ func (wkr *worker) startContainer(ctr arvados.Container) {
 	wkr.starting[ctr.UUID] = struct{}{}
 	wkr.state = StateRunning
 	go func() {
-		env := map[string]string{
-			"ARVADOS_API_HOST":  wkr.wp.arvClient.APIHost,
-			"ARVADOS_API_TOKEN": wkr.wp.arvClient.AuthToken,
-		}
-		stdout, stderr, err := wkr.executor.Execute(env, "crunch-run --detach '"+ctr.UUID+"'", nil)
+		stdin := bytes.NewBufferString(fmt.Sprintf("export %s=%q\nexport %s=%q\n",
+			"ARVADOS_API_HOST", wkr.wp.arvClient.APIHost,
+			"ARVADOS_API_TOKEN", wkr.wp.arvClient.AuthToken))
+		cmd := "source /dev/stdin; crunch-run --detach '" + ctr.UUID + "'"
+		stdout, stderr, err := wkr.executor.Execute(nil, cmd, stdin)
 		wkr.mtx.Lock()
 		defer wkr.mtx.Unlock()
 		now := time.Now()

commit ae31f189732b278c9a47961280e746e8951ef76d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Tue Feb 12 22:00:21 2019 -0500

    14807: Match systemd description to component name.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/cmd/arvados-server/arvados-dispatch-cloud.service b/cmd/arvados-server/arvados-dispatch-cloud.service
index 5ea5d45e7..aa5cc3b4a 100644
--- a/cmd/arvados-server/arvados-dispatch-cloud.service
+++ b/cmd/arvados-server/arvados-dispatch-cloud.service
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 [Unit]
-Description=Arvados cloud dispatch
+Description=arvados-dispatch-cloud
 Documentation=https://doc.arvados.org/
 After=network.target
 AssertPathExists=/etc/arvados/config.yml

commit d06417e5820a336e810eba55d02eb936ce18da2a
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Tue Feb 12 14:17:10 2019 -0500

    14325: Start up immediately if there are no stale locks.
    
    ...instead of waiting for the pool to send a notification to trigger
    the first loop iteration.
    
    refs #14325
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 22e3425b6..0558d79f1 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -156,7 +156,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
 		c.Fatalf("timed out; still waiting for %d containers: %q", len(waiting), waiting)
 	}
 
-	deadline := time.Now().Add(time.Second)
+	deadline := time.Now().Add(5 * time.Second)
 	for range time.NewTicker(10 * time.Millisecond).C {
 		insts, err := s.stubDriver.InstanceSets()[0].Instances(nil)
 		c.Check(err, check.IsNil)
diff --git a/lib/dispatchcloud/scheduler/fix_stale_locks.go b/lib/dispatchcloud/scheduler/fix_stale_locks.go
index 264f9e4ec..148b653c2 100644
--- a/lib/dispatchcloud/scheduler/fix_stale_locks.go
+++ b/lib/dispatchcloud/scheduler/fix_stale_locks.go
@@ -19,24 +19,15 @@ import (
 func (sch *Scheduler) fixStaleLocks() {
 	wp := sch.pool.Subscribe()
 	defer sch.pool.Unsubscribe(wp)
+
+	var stale []string
 	timeout := time.NewTimer(sch.staleLockTimeout)
 waiting:
-	for {
-		unlock := false
-		select {
-		case <-wp:
-			// If all workers have been contacted, unlock
-			// containers that aren't claimed by any
-			// worker.
-			unlock = sch.pool.CountWorkers()[worker.StateUnknown] == 0
-		case <-timeout.C:
-			// Give up and unlock the containers, even
-			// though they might be working.
-			unlock = true
-		}
-
+	for sch.pool.CountWorkers()[worker.StateUnknown] > 0 {
 		running := sch.pool.Running()
 		qEntries, _ := sch.queue.Entries()
+
+		stale = nil
 		for uuid, ent := range qEntries {
 			if ent.Container.State != arvados.ContainerStateLocked {
 				continue
@@ -44,14 +35,25 @@ waiting:
 			if _, running := running[uuid]; running {
 				continue
 			}
-			if !unlock {
-				continue waiting
-			}
-			err := sch.queue.Unlock(uuid)
-			if err != nil {
-				sch.logger.Warnf("Unlock %s: %s", uuid, err)
-			}
+			stale = append(stale, uuid)
+		}
+		if len(stale) == 0 {
+			return
+		}
+
+		select {
+		case <-wp:
+		case <-timeout.C:
+			// Give up.
+			break waiting
+		}
+
+	}
+
+	for _, uuid := range stale {
+		err := sch.queue.Unlock(uuid)
+		if err != nil {
+			sch.logger.Warnf("Unlock %s: %s", uuid, err)
 		}
-		return
 	}
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list