[ARVADOS] updated: 1.3.0-333-g16589cd93
Git user
git at public.curoverse.com
Wed Feb 13 14:44:37 EST 2019
Summary of changes:
lib/dispatchcloud/worker/pool.go | 6 +++++-
lib/dispatchcloud/worker/worker.go | 3 ++-
services/crunch-run/background.go | 35 ++++++++++++++++++++---------------
3 files changed, 27 insertions(+), 17 deletions(-)
discards 014ddd17a11522dfe731c516366b020cb921f279 (commit)
discards 7b5a2c03af304bcab57d4928177ebfe9db01b786 (commit)
discards dc940613ba74d19803321a2f1d641c7ce76cc8ce (commit)
discards 9904bb0dd3ea220d6af185f7222ca033a9895acc (commit)
discards 078d1522a164efc2a7acf8218ac865730cbfb308 (commit)
discards 4e102006a8fb1734887d0d7f8ce6a81b7675d87d (commit)
discards 485bb7abe728cf3cc1db80b75e12299252e81484 (commit)
discards 6549a071cae733ec66952355a13eb42dbf918876 (commit)
via 16589cd93e6780db6a07d7cc110724dea4c19e3e (commit)
via 2873d55ea38c14f1c4cb711c2f9594fba42d41a8 (commit)
via 3a1c0395034b52196834923ca31bc6f42aad9ed5 (commit)
via 6e4237de7a0b595418df84ae2ff4232cf5566aa4 (commit)
via d63c18fb87f06fef9276b6c062acea7523ee8a98 (commit)
via 55c07b5b9959d5d0d830b651a8e40e4151383c67 (commit)
via 286e41383447c04df0e7f23f74df1495eeaa068f (commit)
via 554d1808c57ac318710e1a90b25ebc16de3cbab1 (commit)
via e57e3e19bab60c71cfa0aaf93ceca7d7e9ec33bf (commit)
This update added new revisions after undoing existing revisions. That is
to say, the old revision is not a strict subset of the new revision. This
situation occurs when you --force push a change and generate a repository
containing something like this:
* -- * -- B -- O -- O -- O (014ddd17a11522dfe731c516366b020cb921f279)
\
N -- N -- N (16589cd93e6780db6a07d7cc110724dea4c19e3e)
When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 16589cd93e6780db6a07d7cc110724dea4c19e3e
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 14:38:07 2019 -0500
14807: Include more detail in errors.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go
index 0d4612908..334b3ab67 100644
--- a/services/crunch-run/background.go
+++ b/services/crunch-run/background.go
@@ -49,14 +49,15 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error {
return nil, err
}
defer dirlock.Close()
- lockfile, err := os.OpenFile(filepath.Join(lockdir, lockprefix+uuid+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+ lockfilename := filepath.Join(lockdir, lockprefix+uuid+locksuffix)
+ lockfile, err := os.OpenFile(lockfilename, os.O_CREATE|os.O_RDWR, 0700)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("open %s: %s", lockfilename, err)
}
err = syscall.Flock(int(lockfile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
if err != nil {
lockfile.Close()
- return nil, err
+ return nil, fmt.Errorf("lock %s: %s", lockfilename, err)
}
return lockfile, nil
}()
@@ -91,7 +92,7 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error {
if err != nil {
os.Remove(outfile.Name())
os.Remove(errfile.Name())
- return err
+ return fmt.Errorf("exec %s: %s", cmd.Path, err)
}
w := io.MultiWriter(stdout, lockfile)
@@ -123,14 +124,14 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
if os.IsNotExist(err) {
return nil
} else if err != nil {
- return err
+ return fmt.Errorf("open %s: %s", path, err)
}
defer f.Close()
var pi procinfo
err = json.NewDecoder(f).Decode(&pi)
if err != nil {
- return fmt.Errorf("%s: %s\n", path, err)
+ return fmt.Errorf("decode %s: %s\n", path, err)
}
if pi.UUID != uuid || pi.PID == 0 {
@@ -139,7 +140,7 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
proc, err := os.FindProcess(pi.PID)
if err != nil {
- return err
+ return fmt.Errorf("%s: find process %d: %s", uuid, pi.PID, err)
}
err = proc.Signal(signal)
@@ -147,9 +148,9 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
err = proc.Signal(syscall.Signal(0))
}
if err == nil {
- return fmt.Errorf("pid %d: sent signal %d (%s) but process is still alive", pi.PID, signal, signal)
+ return fmt.Errorf("%s: pid %d: sent signal %d (%s) but process is still alive", uuid, pi.PID, signal, signal)
}
- fmt.Fprintf(stderr, "pid %d: %s\n", pi.PID, err)
+ fmt.Fprintf(stderr, "%s: pid %d: %s\n", uuid, pi.PID, err)
return nil
}
@@ -189,7 +190,7 @@ func ListProcesses(stdout, stderr io.Writer) int {
err := os.Remove(path)
dirlock.Close()
if err != nil {
- fmt.Fprintln(stderr, err)
+ fmt.Fprintf(stderr, "unlink %s: %s\n", f.Name(), err)
}
return nil
}
@@ -227,14 +228,15 @@ func exitcode(stderr io.Writer, err error) int {
//
// Caller releases the lock by closing the returned file.
func lockall() (*os.File, error) {
- f, err := os.OpenFile(filepath.Join(lockdir, lockprefix+"all"+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+ lockfile := filepath.Join(lockdir, lockprefix+"all"+locksuffix)
+ f, err := os.OpenFile(lockfile, os.O_CREATE|os.O_RDWR, 0700)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("open %s: %s", lockfile, err)
}
err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
if err != nil {
f.Close()
- return nil, err
+ return nil, fmt.Errorf("lock %s: %s", lockfile, err)
}
return f, nil
}
commit 2873d55ea38c14f1c4cb711c2f9594fba42d41a8
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 11:11:03 2019 -0500
14807: Fix crunch-run --list output when /var/lock is a symlink.
filepath.Walk(/var/lock) does not return entries inside /var/lock if
/var/lock is a symlink, as it is on debian:9.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go
index a50853837..0d4612908 100644
--- a/services/crunch-run/background.go
+++ b/services/crunch-run/background.go
@@ -155,8 +155,11 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
// List UUIDs of active crunch-run processes.
func ListProcesses(stdout, stderr io.Writer) int {
- return exitcode(stderr, filepath.Walk(lockdir, func(path string, info os.FileInfo, err error) error {
- if info.IsDir() {
+ // filepath.Walk does not follow symlinks, so we must walk
+ // lockdir+"/." in case lockdir itself is a symlink.
+ walkdir := lockdir + "/."
+ return exitcode(stderr, filepath.Walk(walkdir, func(path string, info os.FileInfo, err error) error {
+ if info.IsDir() && path != walkdir {
return filepath.SkipDir
}
if name := info.Name(); !strings.HasPrefix(name, lockprefix) || !strings.HasSuffix(name, locksuffix) {
commit 3a1c0395034b52196834923ca31bc6f42aad9ed5
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:59:42 2019 -0500
14807: Always set node-token tag.
Azure driver expects it to be set to an unpredictable string.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index a5a454877..6f7163c8c 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -5,7 +5,9 @@
package worker
import (
+ "crypto/rand"
"errors"
+ "fmt"
"io"
"sort"
"strings"
@@ -22,6 +24,7 @@ import (
const (
tagKeyInstanceType = "InstanceType"
tagKeyIdleBehavior = "IdleBehavior"
+ tagKeyNodeToken = "node-token" // deprecated, but required by Azure driver
)
// An InstanceView shows a worker's current state and recent activity.
@@ -261,6 +264,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
tags := cloud.InstanceTags{
tagKeyInstanceType: it.Name,
tagKeyIdleBehavior: string(IdleBehaviorRun),
+ tagKeyNodeToken: randomToken(),
}
now := time.Now()
wp.creating[it] = append(wp.creating[it], now)
@@ -781,3 +785,12 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
go wp.notify()
}
}
+
+func randomToken() string {
+ buf := make([]byte, 32)
+ _, err := rand.Read(buf)
+ if err != nil {
+ panic(err)
+ }
+ return fmt.Sprintf("%x", buf)
+}
commit 6e4237de7a0b595418df84ae2ff4232cf5566aa4
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:57:46 2019 -0500
14807: Log full instance ID.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index d4eca8bcb..a5a454877 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -347,7 +347,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
logger := wp.logger.WithFields(logrus.Fields{
"InstanceType": it.Name,
- "Instance": inst,
+ "Instance": inst.ID(),
"Address": inst.Address(),
})
logger.WithFields(logrus.Fields{
@@ -487,7 +487,7 @@ func (wp *Pool) KillContainer(uuid string) {
func (wp *Pool) kill(wkr *worker, uuid string) {
logger := wp.logger.WithFields(logrus.Fields{
"ContainerUUID": uuid,
- "Instance": wkr.instance,
+ "Instance": wkr.instance.ID(),
})
logger.Debug("killing process")
cmd := "crunch-run --kill 15 " + uuid
@@ -763,7 +763,7 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
continue
}
logger := wp.logger.WithFields(logrus.Fields{
- "Instance": wkr.instance,
+ "Instance": wkr.instance.ID(),
"WorkerState": wkr.state,
})
logger.Info("instance disappeared in cloud")
commit d63c18fb87f06fef9276b6c062acea7523ee8a98
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:56:49 2019 -0500
14807: Expose instance IP addresses in logs and management API.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 728372ed3..d4eca8bcb 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -27,6 +27,7 @@ const (
// An InstanceView shows a worker's current state and recent activity.
type InstanceView struct {
Instance cloud.InstanceID `json:"instance"`
+ Address string `json:"address"`
Price float64 `json:"price"`
ArvadosInstanceType string `json:"arvados_instance_type"`
ProviderInstanceType string `json:"provider_instance_type"`
@@ -347,6 +348,7 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
logger := wp.logger.WithFields(logrus.Fields{
"InstanceType": it.Name,
"Instance": inst,
+ "Address": inst.Address(),
})
logger.WithFields(logrus.Fields{
"State": initialState,
@@ -680,6 +682,7 @@ func (wp *Pool) Instances() []InstanceView {
for _, w := range wp.workers {
r = append(r, InstanceView{
Instance: w.instance.ID(),
+ Address: w.instance.Address(),
Price: w.instType.Price,
ArvadosInstanceType: w.instType.Name,
ProviderInstanceType: w.instType.ProviderType,
commit 55c07b5b9959d5d0d830b651a8e40e4151383c67
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:55:32 2019 -0500
14807: Fix SSH target address.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
index 646e90f31..feed1c2a7 100644
--- a/lib/dispatchcloud/ssh_executor/executor.go
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -183,6 +183,9 @@ func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
if h, p, err := net.SplitHostPort(addr); err != nil || p == "" {
// Target address does not specify a port. Use
// targetPort, or "ssh".
+ if h == "" {
+ h = addr
+ }
if p = exr.targetPort; p == "" {
p = "ssh"
}
commit 286e41383447c04df0e7f23f74df1495eeaa068f
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:52:21 2019 -0500
14807: Accept .../instances/_/drain?instance_id=X.
Azure Instance IDs contain slashes, so they don't work well as path
parameters.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index b7ea15933..814dba917 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -212,6 +212,9 @@ func (disp *dispatcher) apiInstanceRun(w http.ResponseWriter, r *http.Request) {
func (disp *dispatcher) apiInstanceIdleBehavior(w http.ResponseWriter, r *http.Request, want worker.IdleBehavior) {
params, _ := r.Context().Value(httprouter.ParamsKey).(httprouter.Params)
id := cloud.InstanceID(params.ByName("instance_id"))
+ if qp := r.FormValue("instance_id"); qp != "" {
+ id = cloud.InstanceID(qp)
+ }
err := disp.pool.SetIdleBehavior(id, want)
if err != nil {
httpserver.Error(w, err.Error(), http.StatusNotFound)
commit 554d1808c57ac318710e1a90b25ebc16de3cbab1
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:49:59 2019 -0500
14807: Pass SSH public key to driver.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index 2d73afcd2..b7ea15933 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -130,7 +130,7 @@ func (disp *dispatcher) initialize() {
}
disp.instanceSet = instanceSet
disp.reg = prometheus.NewRegistry()
- disp.pool = worker.NewPool(disp.logger, arvClient, disp.reg, disp.instanceSet, disp.newExecutor, disp.Cluster)
+ disp.pool = worker.NewPool(disp.logger, arvClient, disp.reg, disp.instanceSet, disp.newExecutor, disp.sshKey.PublicKey(), disp.Cluster)
disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, arvClient)
if disp.Cluster.ManagementToken == "" {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index df37326a0..728372ed3 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -16,6 +16,7 @@ import (
"git.curoverse.com/arvados.git/sdk/go/arvados"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
+ "golang.org/x/crypto/ssh"
)
const (
@@ -84,7 +85,7 @@ func duration(conf arvados.Duration, def time.Duration) time.Duration {
//
// New instances are configured and set up according to the given
// cluster configuration.
-func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, cluster *arvados.Cluster) *Pool {
+func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, installPublicKey ssh.PublicKey, cluster *arvados.Cluster) *Pool {
wp := &Pool{
logger: logger,
arvClient: arvClient,
@@ -100,6 +101,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
timeoutBooting: duration(cluster.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
timeoutProbe: duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
timeoutShutdown: duration(cluster.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
+ installPublicKey: installPublicKey,
stop: make(chan bool),
}
wp.registerMetrics(reg)
@@ -130,6 +132,7 @@ type Pool struct {
timeoutBooting time.Duration
timeoutProbe time.Duration
timeoutShutdown time.Duration
+ installPublicKey ssh.PublicKey
// private state
subscribers map[<-chan struct{}]chan<- struct{}
@@ -262,7 +265,7 @@ func (wp *Pool) Create(it arvados.InstanceType) bool {
wp.creating[it] = append(wp.creating[it], now)
go func() {
defer wp.notify()
- inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
+ inst, err := wp.instanceSet.Create(it, wp.imageID, tags, wp.installPublicKey)
wp.mtx.Lock()
defer wp.mtx.Unlock()
// Remove our timestamp marker from wp.creating
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index 3b66eeb41..526bc552c 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -90,7 +90,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
},
}
- pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster)
+ pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
notify := pool.Subscribe()
defer pool.Unsubscribe(notify)
pool.Create(type1)
@@ -108,7 +108,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
c.Log("------- starting new pool, waiting to recover state")
- pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, cluster)
+ pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
notify2 := pool2.Subscribe()
defer pool2.Unsubscribe(notify2)
waitForIdle(pool2, notify2)
commit e57e3e19bab60c71cfa0aaf93ceca7d7e9ec33bf
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Feb 13 01:42:40 2019 -0500
14807: Allow driver to specify SSH username.
Use a non-root account with passwordless sudo on a provider (Azure)
that can easily set that up, but can't easily set up direct root
login.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/cloud/azure.go b/lib/cloud/azure.go
index a194b3318..6e46fce5f 100644
--- a/lib/cloud/azure.go
+++ b/lib/cloud/azure.go
@@ -44,6 +44,7 @@ type AzureInstanceSetConfig struct {
BlobContainer string `mapstructure:"blob_container"`
Image string `mapstructure:"image"`
DeleteDanglingResourcesAfter float64 `mapstructure:"delete_dangling_resources_after"`
+ AdminUsername string
}
type VirtualMachinesClientWrapper interface {
@@ -367,7 +368,7 @@ func (az *AzureInstanceSet) Create(
name)
customData := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(`#!/bin/sh
-echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
+echo '%s-%s' > '/home/%s/node-token'`, name, newTags["node-token"], az.azconfig.AdminUsername)))
vmParameters := compute.VirtualMachine{
Location: &az.azconfig.Location,
@@ -401,13 +402,13 @@ echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
},
OsProfile: &compute.OSProfile{
ComputerName: &name,
- AdminUsername: to.StringPtr("crunch"),
+ AdminUsername: to.StringPtr(az.azconfig.AdminUsername),
LinuxConfiguration: &compute.LinuxConfiguration{
DisablePasswordAuthentication: to.BoolPtr(true),
SSH: &compute.SSHConfiguration{
PublicKeys: &[]compute.SSHPublicKey{
- compute.SSHPublicKey{
- Path: to.StringPtr("/home/crunch/.ssh/authorized_keys"),
+ {
+ Path: to.StringPtr("/home/" + az.azconfig.AdminUsername + "/.ssh/authorized_keys"),
KeyData: to.StringPtr(string(ssh.MarshalAuthorizedKey(publicKey))),
},
},
@@ -629,6 +630,10 @@ func (ai *AzureInstance) Address() string {
return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
}
+func (ai *AzureInstance) RemoteUser() string {
+ return ai.provider.azconfig.AdminUsername
+}
+
func (ai *AzureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Client) error {
ai.provider.stopWg.Add(1)
defer ai.provider.stopWg.Done()
@@ -656,7 +661,7 @@ func (ai *AzureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Cl
return err
}
- nodetokenbytes, err := sess.Output("cat /home/crunch/node-token")
+ nodetokenbytes, err := sess.Output("cat /home/" + ai.provider.azconfig.AdminUsername + "/node-token")
if err != nil {
return err
}
diff --git a/lib/cloud/azure_test.go b/lib/cloud/azure_test.go
index f74688bb1..0b8aa8431 100644
--- a/lib/cloud/azure_test.go
+++ b/lib/cloud/azure_test.go
@@ -23,6 +23,7 @@
// image: "https://example.blob.core.windows.net/system/Microsoft.Compute/Images/images/zzzzz-compute-osDisk.XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.vhd"
// delete_dangling_resources_after: 20
// authorized_key: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDLQS1ExT2+WjA0d/hntEAyAtgeN1W2ik2QX8c2zO6HjlPHWXL92r07W0WMuDib40Pcevpi1BXeBWXA9ZB5KKMJB+ukaAu22KklnQuUmNvk6ZXnPKSkGxuCYvPQb08WhHf3p1VxiKfP3iauedBDM4x9/bkJohlBBQiFXzNUcQ+a6rKiMzmJN2gbL8ncyUzc+XQ5q4JndTwTGtOlzDiGOc9O4z5Dd76wtAVJneOuuNpwfFRVHThpJM6VThpCZOnl8APaceWXKeuwOuCae3COZMz++xQfxOfZ9Z8aIwo+TlQhsRaNfZ4Vjrop6ej8dtfZtgUFKfbXEOYaHrGrWGotFDTD example at example"
+// AdminUsername: crunch
package cloud
diff --git a/lib/cloud/interfaces.go b/lib/cloud/interfaces.go
index 969a4bc2d..3fb776e09 100644
--- a/lib/cloud/interfaces.go
+++ b/lib/cloud/interfaces.go
@@ -62,6 +62,9 @@ type ExecutorTarget interface {
// unknown while instance is booting.
Address() string
+ // Remote username to send during SSH authentication.
+ RemoteUser() string
+
// Return nil if the given public key matches the instance's
// SSH server key. If the provided Dialer is not nil,
// VerifyHostKey can use it to make outgoing network
diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
index d0fb54c54..646e90f31 100644
--- a/lib/dispatchcloud/ssh_executor/executor.go
+++ b/lib/dispatchcloud/ssh_executor/executor.go
@@ -38,6 +38,7 @@ func New(t cloud.ExecutorTarget) *Executor {
type Executor struct {
target cloud.ExecutorTarget
targetPort string
+ targetUser string
signers []ssh.Signer
mtx sync.RWMutex // controls access to instance after creation
@@ -189,7 +190,7 @@ func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
}
var receivedKey ssh.PublicKey
client, err := ssh.Dial("tcp", addr, &ssh.ClientConfig{
- User: "root",
+ User: target.RemoteUser(),
Auth: []ssh.AuthMethod{
ssh.PublicKeys(exr.signers...),
},
diff --git a/lib/dispatchcloud/ssh_executor/executor_test.go b/lib/dispatchcloud/ssh_executor/executor_test.go
index f8565b4a7..e7c023586 100644
--- a/lib/dispatchcloud/ssh_executor/executor_test.go
+++ b/lib/dispatchcloud/ssh_executor/executor_test.go
@@ -73,6 +73,7 @@ func (s *ExecutorSuite) TestBadHostKey(c *check.C) {
return 0
},
HostKey: hostpriv,
+ AuthorizedUser: "username",
AuthorizedKeys: []ssh.PublicKey{clientpub},
},
}
@@ -121,6 +122,7 @@ func (s *ExecutorSuite) TestExecute(c *check.C) {
return uint32(exitcode)
},
HostKey: hostpriv,
+ AuthorizedUser: "username",
AuthorizedKeys: []ssh.PublicKey{clientpub},
},
}
diff --git a/lib/dispatchcloud/test/ssh_service.go b/lib/dispatchcloud/test/ssh_service.go
index ed5995f4c..b81aa6aad 100644
--- a/lib/dispatchcloud/test/ssh_service.go
+++ b/lib/dispatchcloud/test/ssh_service.go
@@ -39,6 +39,7 @@ type SSHExecFunc func(env map[string]string, command string, stdin io.Reader, st
type SSHService struct {
Exec SSHExecFunc
HostKey ssh.Signer
+ AuthorizedUser string
AuthorizedKeys []ssh.PublicKey
listener net.Listener
@@ -64,6 +65,11 @@ func (ss *SSHService) Address() string {
return ln.Addr().String()
}
+// User returns the username that will be accepted.
+func (ss *SSHService) User() string {
+ return ss.AuthorizedUser
+}
+
// Close shuts down the server and releases resources. Established
// connections are unaffected.
func (ss *SSHService) Close() {
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index 209d92121..e4f16b41a 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -121,6 +121,7 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
}
svm.SSHService = SSHService{
HostKey: sis.driver.HostKey,
+ AuthorizedUser: "root",
AuthorizedKeys: ak,
Exec: svm.Exec,
}
@@ -327,6 +328,10 @@ func (si stubInstance) Address() string {
return si.addr
}
+func (si stubInstance) RemoteUser() string {
+ return si.svm.SSHService.AuthorizedUser
+}
+
func (si stubInstance) Destroy() error {
sis := si.svm.sis
if sis.driver.HoldCloudOps {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index e6b506298..df37326a0 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -485,7 +485,11 @@ func (wp *Pool) kill(wkr *worker, uuid string) {
"Instance": wkr.instance,
})
logger.Debug("killing process")
- stdout, stderr, err := wkr.executor.Execute(nil, "crunch-run --kill 15 "+uuid, nil)
+ cmd := "crunch-run --kill 15 " + uuid
+ if u := wkr.instance.RemoteUser(); u != "root" {
+ cmd = "sudo " + cmd
+ }
+ stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
if err != nil {
logger.WithFields(logrus.Fields{
"stderr": string(stderr),
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index d0810f7a8..fbb6981ca 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -101,10 +101,14 @@ func (wkr *worker) startContainer(ctr arvados.Container) {
wkr.starting[ctr.UUID] = struct{}{}
wkr.state = StateRunning
go func() {
+ cmd := "crunch-run --detach '" + ctr.UUID + "'"
stdin := bytes.NewBufferString(fmt.Sprintf("export %s=%q\nexport %s=%q\n",
"ARVADOS_API_HOST", wkr.wp.arvClient.APIHost,
"ARVADOS_API_TOKEN", wkr.wp.arvClient.AuthToken))
- cmd := "source /dev/stdin; crunch-run --detach '" + ctr.UUID + "'"
+ if u := wkr.instance.RemoteUser(); u != "root" {
+ cmd = "sudo -E " + cmd
+ }
+ cmd = "source /dev/stdin; " + cmd
stdout, stderr, err := wkr.executor.Execute(nil, cmd, stdin)
wkr.mtx.Lock()
defer wkr.mtx.Unlock()
@@ -325,6 +329,9 @@ func (wkr *worker) probeAndUpdate() {
func (wkr *worker) probeRunning() (running []string, ok bool) {
cmd := "crunch-run --list"
+ if u := wkr.instance.RemoteUser(); u != "root" {
+ cmd = "sudo " + cmd
+ }
stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
if err != nil {
wkr.logger.WithFields(logrus.Fields{
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list