[ARVADOS] created: 1.3.0-331-g7b5a2c03a
Git user
git at public.curoverse.com
Wed Feb 13 02:02:22 EST 2019
at 7b5a2c03af304bcab57d4928177ebfe9db01b786 (commit)
commit 7b5a2c03af304bcab57d4928177ebfe9db01b786
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:59:42 2019 -0500
14807: Always set node-token tag.
Azure driver expects it to be set to an unpredictable string.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 96c331bf6..e7aedc54d 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -5,7 +5,9 @@
package worker
import (
+ "crypto/rand"
"errors"
+ "fmt"
"io"
"sort"
"strings"
@@ -22,6 +24,7 @@ import (
const (
tagKeyInstanceType = "InstanceType"
tagKeyIdleBehavior = "IdleBehavior"
+ tagKeyNodeToken = "node-token" // deprecated, but required by Azure driver
)
// An InstanceView shows a worker's current state and recent activity.
@@ -261,6 +264,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
tags := cloud.InstanceTags{
tagKeyInstanceType: it.Name,
tagKeyIdleBehavior: string(IdleBehaviorRun),
+ tagKeyNodeToken: randomToken(),
}
now := time.Now()
wp.creating[it] = append(wp.creating[it], now)
@@ -777,3 +781,12 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
go wp.notify()
}
}
+
+func randomToken() string {
+ buf := make([]byte, 32)
+ _, err := rand.Read(buf)
+ if err != nil {
+ panic(err)
+ }
+ return fmt.Sprintf("%x", buf)
+}
commit dc940613ba74d19803321a2f1d641c7ce76cc8ce
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:57:46 2019 -0500
14807: Log full instance ID.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 2f82b82c2..96c331bf6 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -347,7 +347,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
logger := wp.logger.WithFields(logrus.Fields{
"InstanceType": it.Name,
- "Instance": inst,
+ "Instance": inst.ID(),
"Address": inst.Address(),
})
logger.WithFields(logrus.Fields{
@@ -487,7 +487,7 @@ func (wp *Pool) KillContainer(uuid string) {
func (wp *Pool) kill(wkr *worker, uuid string) {
logger := wp.logger.WithFields(logrus.Fields{
"ContainerUUID": uuid,
- "Instance": wkr.instance,
+ "Instance": wkr.instance.ID(),
})
logger.Debug("killing process")
stdout, stderr, err := wkr.executor.Execute(nil, "crunch-run --kill 15 "+uuid, nil)
@@ -759,7 +759,7 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
continue
}
logger := wp.logger.WithFields(logrus.Fields{
- "Instance": wkr.instance,
+ "Instance": wkr.instance.ID(),
"WorkerState": wkr.state,
})
logger.Info("instance disappeared in cloud")
commit 9904bb0dd3ea220d6af185f7222ca033a9895acc
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:56:49 2019 -0500
14807: Expose instance IP addresses in logs and managemnet API.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index b5d89b20a..2f82b82c2 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -27,6 +27,7 @@ const (
// An InstanceView shows a worker's current state and recent activity.
type InstanceView struct {
Instance cloud.InstanceID `json:"instance"`
+ Address string `json:"address"`
Price float64 `json:"price"`
ArvadosInstanceType string `json:"arvados_instance_type"`
ProviderInstanceType string `json:"provider_instance_type"`
@@ -347,6 +348,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
logger := wp.logger.WithFields(logrus.Fields{
"InstanceType": it.Name,
"Instance": inst,
+ "Address": inst.Address(),
})
logger.WithFields(logrus.Fields{
"State": initialState,
@@ -676,6 +678,7 @@ func (wp *Pool) Instances() []InstanceView {
for _, w := range wp.workers {
r = append(r, InstanceView{
Instance: w.instance.ID(),
+ Address: w.instance.Address(),
Price: w.instType.Price,
ArvadosInstanceType: w.instType.Name,
ProviderInstanceType: w.instType.ProviderType,
commit 078d1522a164efc2a7acf8218ac865730cbfb308
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:55:32 2019 -0500
14807: Fix SSH target address.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
index 646e90f31..feed1c2a7 100644
--- a/lib/dispatchcloud/ssh_executor/executor.go
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -183,6 +183,9 @@ func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
if h, p, err := net.SplitHostPort(addr); err != nil || p == "" {
// Target address does not specify a port. Use
// targetPort, or "ssh".
+ if h == "" {
+ h = addr
+ }
if p = exr.targetPort; p == "" {
p = "ssh"
}
commit 4e102006a8fb1734887d0d7f8ce6a81b7675d87d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:52:21 2019 -0500
14807: Accept .../instances/_/drain?instance_id=X.
Azure Instance IDs contain slashes, so they don't work well as path
parameters.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index b7ea15933..814dba917 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -212,6 +212,9 @@ func (disp *dispatcher) apiInstanceRun(w http.ResponseWriter, r *http.Request) {
func (disp *dispatcher) apiInstanceIdleBehavior(w http.ResponseWriter, r *http.Request, want worker.IdleBehavior) {
params, _ := r.Context().Value(httprouter.ParamsKey).(httprouter.Params)
id := cloud.InstanceID(params.ByName("instance_id"))
+ if qp := r.FormValue("instance_id"); qp != "" {
+ id = cloud.InstanceID(qp)
+ }
err := disp.pool.SetIdleBehavior(id, want)
if err != nil {
httpserver.Error(w, err.Error(), http.StatusNotFound)
commit 485bb7abe728cf3cc1db80b75e12299252e81484
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:49:59 2019 -0500
14807: Pass SSH public key to driver.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index 2d73afcd2..b7ea15933 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -130,7 +130,7 @@ func (disp *dispatcher) initialize() {
}
disp.instanceSet = instanceSet
disp.reg = prometheus.NewRegistry()
- disp.pool = worker.NewPool(disp.logger, arvClient, disp.reg, disp.instanceSet, disp.newExecutor, disp.Cluster)
+ disp.pool = worker.NewPool(disp.logger, arvClient, disp.reg, disp.instanceSet, disp.newExecutor, disp.sshKey.PublicKey(), disp.Cluster)
disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, arvClient)
if disp.Cluster.ManagementToken == "" {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index e6b506298..b5d89b20a 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -16,6 +16,7 @@ import (
"git.curoverse.com/arvados.git/sdk/go/arvados"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
+ "golang.org/x/crypto/ssh"
)
const (
@@ -84,7 +85,7 @@ func duration(conf arvados.Duration, def time.Duration) time.Duration {
//
// New instances are configured and set up according to the given
// cluster configuration.
-func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, cluster *arvados.Cluster) *Pool {
+func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, installPublicKey ssh.PublicKey, cluster *arvados.Cluster) *Pool {
wp := &Pool{
logger: logger,
arvClient: arvClient,
@@ -100,6 +101,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
timeoutBooting: duration(cluster.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
timeoutProbe: duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
timeoutShutdown: duration(cluster.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
+ installPublicKey: installPublicKey,
stop: make(chan bool),
}
wp.registerMetrics(reg)
@@ -130,6 +132,7 @@ type Pool struct {
timeoutBooting time.Duration
timeoutProbe time.Duration
timeoutShutdown time.Duration
+ installPublicKey ssh.PublicKey
// private state
subscribers map[<-chan struct{}]chan<- struct{}
@@ -262,7 +265,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
wp.creating[it] = append(wp.creating[it], now)
go func() {
defer wp.notify()
- inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
+ inst, err := wp.instanceSet.Create(it, wp.imageID, tags, wp.installPublicKey)
wp.mtx.Lock()
defer wp.mtx.Unlock()
// Remove our timestamp marker from wp.creating
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index 3b66eeb41..526bc552c 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -90,7 +90,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
},
}
- pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster)
+ pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
notify := pool.Subscribe()
defer pool.Unsubscribe(notify)
pool.Create(type1)
@@ -108,7 +108,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
c.Log("------- starting new pool, waiting to recover state")
- pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster)
+ pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
notify2 := pool2.Subscribe()
defer pool2.Unsubscribe(notify2)
waitForIdle(pool2, notify2)
commit 6549a071cae733ec66952355a13eb42dbf918876
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:42:40 2019 -0500
14807: Allow driver to specify SSH username.
Use a non-root account with passwordless sudo on a provider (Azure)
that can easily set that up, but can't easily set up direct root
login.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/cloud/azure.go b/lib/cloud/azure.go
index a194b3318..6e46fce5f 100644
--- a/lib/cloud/azure.go
+++ b/lib/cloud/azure.go
@@ -44,6 +44,7 @@ type AzureInstanceSetConfig struct {
BlobContainer string `mapstructure:"blob_container"`
Image string `mapstructure:"image"`
DeleteDanglingResourcesAfter float64 `mapstructure:"delete_dangling_resources_after"`
+ AdminUsername string
}
type VirtualMachinesClientWrapper interface {
@@ -367,7 +368,7 @@ func (az *AzureInstanceSet) Create(
name)
customData := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(`#!/bin/sh
-echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
+echo '%s-%s' > '/home/%s/node-token'`, name, newTags["node-token"], az.azconfig.AdminUsername)))
vmParameters := compute.VirtualMachine{
Location: &az.azconfig.Location,
@@ -401,13 +402,13 @@ echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
},
OsProfile: &compute.OSProfile{
ComputerName: &name,
- AdminUsername: to.StringPtr("crunch"),
+ AdminUsername: to.StringPtr(az.azconfig.AdminUsername),
LinuxConfiguration: &compute.LinuxConfiguration{
DisablePasswordAuthentication: to.BoolPtr(true),
SSH: &compute.SSHConfiguration{
PublicKeys: &[]compute.SSHPublicKey{
- compute.SSHPublicKey{
- Path: to.StringPtr("/home/crunch/.ssh/authorized_keys"),
+ {
+ Path: to.StringPtr("/home/" + az.azconfig.AdminUsername + "/.ssh/authorized_keys"),
KeyData: to.StringPtr(string(ssh.MarshalAuthorizedKey(publicKey))),
},
},
@@ -629,6 +630,10 @@ func (ai *AzureInstance) Address() string {
return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
}
+func (ai *AzureInstance) RemoteUser() string {
+ return ai.provider.azconfig.AdminUsername
+}
+
func (ai *AzureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Client) error {
ai.provider.stopWg.Add(1)
defer ai.provider.stopWg.Done()
@@ -656,7 +661,7 @@ func (ai *AzureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Cl
return err
}
- nodetokenbytes, err := sess.Output("cat /home/crunch/node-token")
+ nodetokenbytes, err := sess.Output("cat /home/" + ai.provider.azconfig.AdminUsername + "/node-token")
if err != nil {
return err
}
diff --git a/lib/cloud/azure_test.go b/lib/cloud/azure_test.go
index f74688bb1..0b8aa8431 100644
--- a/lib/cloud/azure_test.go
+++ b/lib/cloud/azure_test.go
@@ -23,6 +23,7 @@
// image: "https://example.blob.core.windows.net/system/Microsoft.Compute/Images/images/zzzzz-compute-osDisk.XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.vhd"
// delete_dangling_resources_after: 20
// authorized_key: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDLQS1ExT2+WjA0d/hntEAyAtgeN1W2ik2QX8c2zO6HjlPHWXL92r07W0WMuDib40Pcevpi1BXeBWXA9ZB5KKMJB+ukaAu22KklnQuUmNvk6ZXnPKSkGxuCYvPQb08WhHf3p1VxiKfP3iauedBDM4x9/bkJohlBBQiFXzNUcQ+a6rKiMzmJN2gbL8ncyUzc+XQ5q4JndTwTGtOlzDiGOc9O4z5Dd76wtAVJneOuuNpwfFRVHThpJM6VThpCZOnl8APaceWXKeuwOuCae3COZMz++xQfxOfZ9Z8aIwo+TlQhsRaNfZ4Vjrop6ej8dtfZtgUFKfbXEOYaHrGrWGotFDTD example at example"
+// AdminUsername: crunch
package cloud
diff --git a/lib/cloud/interfaces.go b/lib/cloud/interfaces.go
index 969a4bc2d..3fb776e09 100644
--- a/lib/cloud/interfaces.go
+++ b/lib/cloud/interfaces.go
@@ -62,6 +62,9 @@ type ExecutorTarget interface {
// unknown while instance is booting.
Address() string
+ // Remote username to send during SSH authentication.
+ RemoteUser() string
+
// Return nil if the given public key matches the instance's
// SSH server key. If the provided Dialer is not nil,
// VerifyHostKey can use it to make outgoing network
diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
index d0fb54c54..646e90f31 100644
--- a/lib/dispatchcloud/ssh_executor/executor.go
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -38,6 +38,7 @@ func New(t cloud.ExecutorTarget) *Executor {
type Executor struct {
target cloud.ExecutorTarget
targetPort string
+ targetUser string
signers []ssh.Signer
mtx sync.RWMutex // controls access to instance after creation
@@ -189,7 +190,7 @@ func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
}
var receivedKey ssh.PublicKey
client, err := ssh.Dial("tcp", addr, &ssh.ClientConfig{
- User: "root",
+ User: target.RemoteUser(),
Auth: []ssh.AuthMethod{
ssh.PublicKeys(exr.signers...),
},
diff --git a/lib/dispatchcloud/ssh_executor/executor_test.go b/lib/dispatchcloud/ssh_executor/executor_test.go
index f8565b4a7..e7c023586 100644
--- a/lib/dispatchcloud/ssh_executor/executor_test.go
+++ b/lib/dispatchcloud/ssh_executor/executor_test.go
@@ -73,6 +73,7 @@ func (s *ExecutorSuite) TestBadHostKey(c *check.C) {
return 0
},
HostKey: hostpriv,
+ AuthorizedUser: "username",
AuthorizedKeys: []ssh.PublicKey{clientpub},
},
}
@@ -121,6 +122,7 @@ func (s *ExecutorSuite) TestExecute(c *check.C) {
return uint32(exitcode)
},
HostKey: hostpriv,
+ AuthorizedUser: "username",
AuthorizedKeys: []ssh.PublicKey{clientpub},
},
}
diff --git a/lib/dispatchcloud/test/ssh_service.go b/lib/dispatchcloud/test/ssh_service.go
index ed5995f4c..b81aa6aad 100644
--- a/lib/dispatchcloud/test/ssh_service.go
+++ b/lib/dispatchcloud/test/ssh_service.go
@@ -39,6 +39,7 @@ type SSHExecFunc func(env map[string]string, command string, stdin io.Reader, st
type SSHService struct {
Exec SSHExecFunc
HostKey ssh.Signer
+ AuthorizedUser string
AuthorizedKeys []ssh.PublicKey
listener net.Listener
@@ -64,6 +65,11 @@ func (ss *SSHService) Address() string {
return ln.Addr().String()
}
+// User returns the username that will be accepted.
+func (ss *SSHService) User() string {
+ return ss.AuthorizedUser
+}
+
// Close shuts down the server and releases resources. Established
// connections are unaffected.
func (ss *SSHService) Close() {
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index 209d92121..e4f16b41a 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -121,6 +121,7 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
}
svm.SSHService = SSHService{
HostKey: sis.driver.HostKey,
+ AuthorizedUser: "root",
AuthorizedKeys: ak,
Exec: svm.Exec,
}
@@ -327,6 +328,10 @@ func (si stubInstance) Address() string {
return si.addr
}
+func (si stubInstance) RemoteUser() string {
+ return si.svm.SSHService.AuthorizedUser
+}
+
func (si stubInstance) Destroy() error {
sis := si.svm.sis
if sis.driver.HoldCloudOps {
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index d0810f7a8..4064be058 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -105,6 +105,9 @@ func (wkr *worker) startContainer(ctr arvados.Container) {
"ARVADOS_API_HOST", wkr.wp.arvClient.APIHost,
"ARVADOS_API_TOKEN", wkr.wp.arvClient.AuthToken))
cmd := "source /dev/stdin; crunch-run --detach '" + ctr.UUID + "'"
+ if u := wkr.instance.RemoteUser(); u != "root" {
+ cmd = "sudo -E " + cmd
+ }
stdout, stderr, err := wkr.executor.Execute(nil, cmd, stdin)
wkr.mtx.Lock()
defer wkr.mtx.Unlock()
@@ -325,6 +328,9 @@ func (wkr *worker) probeAndUpdate() {
func (wkr *worker) probeRunning() (running []string, ok bool) {
cmd := "crunch-run --list"
+ if u := wkr.instance.RemoteUser(); u != "root" {
+ cmd = "sudo " + cmd
+ }
stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
if err != nil {
wkr.logger.WithFields(logrus.Fields{
commit 45113a215e8ced04edf2cf29febe32598e6ad8b0
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:34:43 2019 -0500
14807: When ProviderType is unspecified, default to Arvados type.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 5491200cb..40d6b6598 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -168,6 +168,9 @@ func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
if _, ok := (*it)[t.Name]; ok {
return errDuplicateInstanceTypeName
}
+ if t.ProviderType == "" {
+ t.ProviderType = t.Name
+ }
(*it)[t.Name] = t
}
return nil
@@ -177,10 +180,14 @@ func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
if err != nil {
return err
}
- // Fill in Name field using hash key.
+ // Fill in Name field (and ProviderType field, if not
+ // specified) using hash key.
*it = InstanceTypeMap(hash)
for name, t := range *it {
t.Name = name
+ if t.ProviderType == "" {
+ t.ProviderType = name
+ }
(*it)[name] = t
}
return nil
commit ddcb0fb32f049ae853c502fead4b8ca6696d37ad
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:32:34 2019 -0500
14807: Pass env vars on stdin instead of using SSH feature.
Arbitrary environment variables are typically not accepted by SSH
server configs.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index a2231673f..209d92121 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"io"
+ "io/ioutil"
math_rand "math/rand"
"regexp"
"strings"
@@ -200,6 +201,11 @@ func (svm *StubVM) Instance() stubInstance {
}
func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+ stdinData, err := ioutil.ReadAll(stdin)
+ if err != nil {
+ fmt.Fprintf(stderr, "error reading stdin: %s\n", err)
+ return 1
+ }
queue := svm.sis.driver.Queue
uuid := regexp.MustCompile(`.{5}-dz642-.{15}`).FindString(command)
if eta := svm.Boot.Sub(time.Now()); eta > 0 {
@@ -214,10 +220,17 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
fmt.Fprint(stderr, "crunch-run: command not found\n")
return 1
}
- if strings.HasPrefix(command, "crunch-run --detach ") {
+ if strings.HasPrefix(command, "source /dev/stdin; crunch-run --detach ") {
+ stdinKV := map[string]string{}
+ for _, line := range strings.Split(string(stdinData), "\n") {
+ kv := strings.SplitN(strings.TrimPrefix(line, "export "), "=", 2)
+ if len(kv) == 2 && len(kv[1]) > 0 {
+ stdinKV[kv[0]] = kv[1]
+ }
+ }
for _, name := range []string{"ARVADOS_API_HOST", "ARVADOS_API_TOKEN"} {
- if env[name] == "" {
- fmt.Fprintf(stderr, "%s missing from environment %q\n", name, env)
+ if stdinKV[name] == "" {
+ fmt.Fprintf(stderr, "%s env var missing from stdin %q\n", name, stdin)
return 1
}
}
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index a24747267..d0810f7a8 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -101,11 +101,11 @@ func (wkr *worker) startContainer(ctr arvados.Container) {
wkr.starting[ctr.UUID] = struct{}{}
wkr.state = StateRunning
go func() {
- env := map[string]string{
- "ARVADOS_API_HOST": wkr.wp.arvClient.APIHost,
- "ARVADOS_API_TOKEN": wkr.wp.arvClient.AuthToken,
- }
- stdout, stderr, err := wkr.executor.Execute(env, "crunch-run --detach '"+ctr.UUID+"'", nil)
+ stdin := bytes.NewBufferString(fmt.Sprintf("export %s=%q\nexport %s=%q\n",
+ "ARVADOS_API_HOST", wkr.wp.arvClient.APIHost,
+ "ARVADOS_API_TOKEN", wkr.wp.arvClient.AuthToken))
+ cmd := "source /dev/stdin; crunch-run --detach '" + ctr.UUID + "'"
+ stdout, stderr, err := wkr.executor.Execute(nil, cmd, stdin)
wkr.mtx.Lock()
defer wkr.mtx.Unlock()
now := time.Now()
commit ae31f189732b278c9a47961280e746e8951ef76d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Feb 12 22:00:21 2019 -0500
14807: Match systemd description to component name.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/cmd/arvados-server/arvados-dispatch-cloud.service b/cmd/arvados-server/arvados-dispatch-cloud.service
index 5ea5d45e7..aa5cc3b4a 100644
--- a/cmd/arvados-server/arvados-dispatch-cloud.service
+++ b/cmd/arvados-server/arvados-dispatch-cloud.service
@@ -3,7 +3,7 @@
# SPDX-License-Identifier: AGPL-3.0
[Unit]
-Description=Arvados cloud dispatch
+Description=arvados-dispatch-cloud
Documentation=https://doc.arvados.org/
After=network.target
AssertPathExists=/etc/arvados/config.yml
commit d06417e5820a336e810eba55d02eb936ce18da2a
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Feb 12 14:17:10 2019 -0500
14325: Start up immediately if there are no stale locks.
...instead of waiting for the pool to send a notification to trigger
the first loop iteration.
refs #14325
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 22e3425b6..0558d79f1 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -156,7 +156,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
c.Fatalf("timed out; still waiting for %d containers: %q", len(waiting), waiting)
}
- deadline := time.Now().Add(time.Second)
+ deadline := time.Now().Add(5 * time.Second)
for range time.NewTicker(10 * time.Millisecond).C {
insts, err := s.stubDriver.InstanceSets()[0].Instances(nil)
c.Check(err, check.IsNil)
diff --git a/lib/dispatchcloud/scheduler/fix_stale_locks.go b/lib/dispatchcloud/scheduler/fix_stale_locks.go
index 264f9e4ec..148b653c2 100644
--- a/lib/dispatchcloud/scheduler/fix_stale_locks.go
+++ b/lib/dispatchcloud/scheduler/fix_stale_locks.go
@@ -19,24 +19,15 @@ import (
func (sch *Scheduler) fixStaleLocks() {
wp := sch.pool.Subscribe()
defer sch.pool.Unsubscribe(wp)
+
+ var stale []string
timeout := time.NewTimer(sch.staleLockTimeout)
waiting:
- for {
- unlock := false
- select {
- case <-wp:
- // If all workers have been contacted, unlock
- // containers that aren't claimed by any
- // worker.
- unlock = sch.pool.CountWorkers()[worker.StateUnknown] == 0
- case <-timeout.C:
- // Give up and unlock the containers, even
- // though they might be working.
- unlock = true
- }
-
+ for sch.pool.CountWorkers()[worker.StateUnknown] > 0 {
running := sch.pool.Running()
qEntries, _ := sch.queue.Entries()
+
+ stale = nil
for uuid, ent := range qEntries {
if ent.Container.State != arvados.ContainerStateLocked {
continue
@@ -44,14 +35,25 @@ waiting:
if _, running := running[uuid]; running {
continue
}
- if !unlock {
- continue waiting
- }
- err := sch.queue.Unlock(uuid)
- if err != nil {
- sch.logger.Warnf("Unlock %s: %s", uuid, err)
- }
+ stale = append(stale, uuid)
+ }
+ if len(stale) == 0 {
+ return
+ }
+
+ select {
+ case <-wp:
+ case <-timeout.C:
+ // Give up.
+ break waiting
+ }
+
+ }
+
+ for _, uuid := range stale {
+ err := sch.queue.Unlock(uuid)
+ if err != nil {
+ sch.logger.Warnf("Unlock %s: %s", uuid, err)
}
- return
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list