[ARVADOS] created: 2.1.0-2454-gef973cee1
Git user
git at public.arvados.org
Wed May 11 17:05:40 UTC 2022
at ef973cee1f9266510de1574c0424102242bc6dba (commit)
commit ef973cee1f9266510de1574c0424102242bc6dba
Author: Tom Clegg <tom at curii.com>
Date: Wed May 4 14:52:22 2022 -0400
15370: Add dispatchcloud loopback driver.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/boot/passenger.go b/lib/boot/passenger.go
index 410befab9..f86f1f930 100644
--- a/lib/boot/passenger.go
+++ b/lib/boot/passenger.go
@@ -28,8 +28,8 @@ var railsEnv = []string{
// Install a Rails application's dependencies, including phusion
// passenger.
type installPassenger struct {
- src string
- varlibdir string
+ src string // path to app in source tree
+ varlibdir string // path to app (relative to /var/lib/arvados) in OS package: "railsapi" or "workbench1"
depends []supervisedTask
}
@@ -52,6 +52,11 @@ func (runner installPassenger) Run(ctx context.Context, fail func(error), super
appdir := runner.src
if super.ClusterType == "test" {
+ // In the multi-cluster test setup, if we run multiple
+ // Rails instances directly from the source tree, they
+ // step on one another's files in {source}/tmp, log,
+ // etc. So instead we copy the source directory into a
+ // temp dir and run the Rails app from there.
appdir = filepath.Join(super.tempdir, runner.varlibdir)
err = super.RunProgram(ctx, super.tempdir, runOptions{}, "mkdir", "-p", appdir)
if err != nil {
@@ -112,7 +117,7 @@ func (runner installPassenger) Run(ctx context.Context, fail func(error), super
type runPassenger struct {
src string // path to app in source tree
- varlibdir string // path to app (relative to /var/lib/arvados) in OS package
+ varlibdir string // path to app (relative to /var/lib/arvados) in OS package: "railsapi" or "workbench1"
svc arvados.Service
depends []supervisedTask
}
diff --git a/lib/boot/supervisor.go b/lib/boot/supervisor.go
index 5ec1b2244..d69cc9f18 100644
--- a/lib/boot/supervisor.go
+++ b/lib/boot/supervisor.go
@@ -387,10 +387,14 @@ func (super *Supervisor) runCluster() error {
}
if super.ClusterType != "test" {
tasks = append(tasks,
- runServiceCommand{name: "dispatch-cloud", svc: super.cluster.Services.DispatchCloud},
runServiceCommand{name: "keep-balance", svc: super.cluster.Services.Keepbalance},
)
}
+ if super.cluster.Containers.CloudVMs.Enable {
+ tasks = append(tasks,
+ runServiceCommand{name: "dispatch-cloud", svc: super.cluster.Services.DispatchCloud},
+ )
+ }
super.tasksReady = map[string]chan bool{}
for _, task := range tasks {
super.tasksReady[task.String()] = make(chan bool)
@@ -824,9 +828,6 @@ func (super *Supervisor) autofillConfig() error {
&super.cluster.Services.Workbench1,
&super.cluster.Services.Workbench2,
} {
- if svc == &super.cluster.Services.DispatchCloud && super.ClusterType == "test" {
- continue
- }
if svc.ExternalURL.Host == "" {
port, err := nextPort(defaultExtHost)
if err != nil {
diff --git a/lib/cloud/loopback/loopback.go b/lib/cloud/loopback/loopback.go
new file mode 100644
index 000000000..6ad4f876d
--- /dev/null
+++ b/lib/cloud/loopback/loopback.go
@@ -0,0 +1,164 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package loopback
+
+import (
+ "bytes"
+ "crypto/rand"
+ "crypto/rsa"
+ "encoding/json"
+ "errors"
+ "io"
+ "os/exec"
+ "os/user"
+ "strings"
+ "sync"
+ "syscall"
+
+ "git.arvados.org/arvados.git/lib/cloud"
+ "git.arvados.org/arvados.git/lib/dispatchcloud/test"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/crypto/ssh"
+)
+
+// Driver is the loopback implementation of the cloud.Driver interface.
+var Driver = cloud.DriverFunc(newInstanceSet)
+
+var (
+ errUnimplemented = errors.New("function not implemented by loopback driver")
+ errQuota = quotaError("loopback driver is always at quota")
+)
+
+type quotaError string
+
+func (e quotaError) IsQuotaError() bool { return true }
+func (e quotaError) Error() string { return string(e) }
+
+type instanceSet struct {
+ instanceSetID cloud.InstanceSetID
+ logger logrus.FieldLogger
+ instances []*instance
+ mtx sync.Mutex
+}
+
+func newInstanceSet(config json.RawMessage, instanceSetID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (cloud.InstanceSet, error) {
+ is := &instanceSet{
+ instanceSetID: instanceSetID,
+ logger: logger,
+ }
+ return is, nil
+}
+
+func (is *instanceSet) Create(it arvados.InstanceType, _ cloud.ImageID, tags cloud.InstanceTags, _ cloud.InitCommand, pubkey ssh.PublicKey) (cloud.Instance, error) {
+ is.mtx.Lock()
+ defer is.mtx.Unlock()
+ if len(is.instances) > 0 {
+ return nil, errQuota
+ }
+ u, err := user.Current()
+ if err != nil {
+ return nil, err
+ }
+ hostRSAKey, err := rsa.GenerateKey(rand.Reader, 1024)
+ if err != nil {
+ return nil, err
+ }
+ hostKey, err := ssh.NewSignerFromKey(hostRSAKey)
+ if err != nil {
+ return nil, err
+ }
+ hostPubKey, err := ssh.NewPublicKey(hostRSAKey.Public())
+ if err != nil {
+ return nil, err
+ }
+ inst := &instance{
+ is: is,
+ instanceType: it,
+ adminUser: u.Username,
+ tags: tags,
+ hostPubKey: hostPubKey,
+ sshService: test.SSHService{
+ HostKey: hostKey,
+ AuthorizedUser: u.Username,
+ AuthorizedKeys: []ssh.PublicKey{pubkey},
+ },
+ }
+ inst.sshService.Exec = inst.sshExecFunc
+ go inst.sshService.Start()
+ is.instances = []*instance{inst}
+ return inst, nil
+}
+
+func (is *instanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, error) {
+ is.mtx.Lock()
+ defer is.mtx.Unlock()
+ var ret []cloud.Instance
+ for _, inst := range is.instances {
+ ret = append(ret, inst)
+ }
+ return ret, nil
+}
+
+func (is *instanceSet) Stop() {
+ is.mtx.Lock()
+ defer is.mtx.Unlock()
+ for _, inst := range is.instances {
+ inst.sshService.Close()
+ }
+}
+
+type instance struct {
+ is *instanceSet
+ instanceType arvados.InstanceType
+ adminUser string
+ tags cloud.InstanceTags
+ hostPubKey ssh.PublicKey
+ sshService test.SSHService
+}
+
+func (i *instance) ID() cloud.InstanceID { return cloud.InstanceID(i.instanceType.ProviderType) }
+func (i *instance) String() string { return i.instanceType.ProviderType }
+func (i *instance) ProviderType() string { return i.instanceType.ProviderType }
+func (i *instance) Address() string { return i.sshService.Address() }
+func (i *instance) RemoteUser() string { return i.adminUser }
+func (i *instance) Tags() cloud.InstanceTags { return i.tags }
+func (i *instance) SetTags(tags cloud.InstanceTags) error {
+ i.tags = tags
+ return nil
+}
+func (i *instance) Destroy() error {
+ i.is.mtx.Lock()
+ defer i.is.mtx.Unlock()
+ i.is.instances = i.is.instances[:0]
+ return nil
+}
+func (i *instance) VerifyHostKey(pubkey ssh.PublicKey, _ *ssh.Client) error {
+ if !bytes.Equal(pubkey.Marshal(), i.hostPubKey.Marshal()) {
+ return errors.New("host key mismatch")
+ }
+ return nil
+}
+func (i *instance) sshExecFunc(env map[string]string, command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+ cmd := exec.Command("sh", "-c", strings.TrimPrefix(command, "sudo "))
+ cmd.Stdin = stdin
+ cmd.Stdout = stdout
+ cmd.Stderr = stderr
+ for k, v := range env {
+ cmd.Env = append(cmd.Env, k+"="+v)
+ }
+ // Prevent child process from using our tty.
+ cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
+ err := cmd.Run()
+ if err == nil {
+ return 0
+ } else if err, ok := err.(*exec.ExitError); !ok {
+ return 1
+ } else if code := err.ExitCode(); code < 0 {
+ return 1
+ } else {
+ return uint32(code)
+ }
+}
diff --git a/lib/cloud/loopback/loopback_test.go b/lib/cloud/loopback/loopback_test.go
new file mode 100644
index 000000000..5c30f5f0e
--- /dev/null
+++ b/lib/cloud/loopback/loopback_test.go
@@ -0,0 +1,127 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package loopback
+
+import (
+ "crypto/rand"
+ "crypto/rsa"
+ "encoding/json"
+ "testing"
+ "time"
+
+ "git.arvados.org/arvados.git/lib/cloud"
+ "git.arvados.org/arvados.git/lib/dispatchcloud/sshexecutor"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "golang.org/x/crypto/ssh"
+ check "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) {
+ check.TestingT(t)
+}
+
+type suite struct{}
+
+var _ = check.Suite(&suite{})
+
+func (*suite) TestCreateListExecDestroy(c *check.C) {
+ logger := ctxlog.TestLogger(c)
+ is, err := Driver.InstanceSet(json.RawMessage("{}"), "testInstanceSetID", cloud.SharedResourceTags{"sharedTag": "sharedTagValue"}, logger)
+ c.Assert(err, check.IsNil)
+
+ clientRSAKey, err := rsa.GenerateKey(rand.Reader, 1024)
+ c.Assert(err, check.IsNil)
+ clientSSHKey, err := ssh.NewSignerFromKey(clientRSAKey)
+ c.Assert(err, check.IsNil)
+ clientSSHPubKey, err := ssh.NewPublicKey(clientRSAKey.Public())
+ c.Assert(err, check.IsNil)
+
+ it := arvados.InstanceType{
+ Name: "localhost",
+ ProviderType: "localhost",
+ RAM: 1002003004,
+ VCPUs: 5,
+ }
+
+ // First call to Create should succeed, and the returned
+ // instance's SSH target address should be available in << 1s.
+ inst, err := is.Create(it, "testImageID", cloud.InstanceTags{"instanceTag": "instanceTagValue"}, "testInitCommand", clientSSHPubKey)
+ c.Assert(err, check.IsNil)
+ for deadline := time.Now().Add(time.Second); inst.Address() == ""; time.Sleep(time.Second / 100) {
+ if deadline.Before(time.Now()) {
+ c.Fatal("timed out")
+ }
+ }
+
+ // Another call to Create should fail with a quota error.
+ inst2, err := is.Create(it, "testImageID", cloud.InstanceTags{"instanceTag": "instanceTagValue"}, "testInitCommand", clientSSHPubKey)
+ c.Check(inst2, check.IsNil)
+ qerr, ok := err.(cloud.QuotaError)
+ if c.Check(ok, check.Equals, true, check.Commentf("expect cloud.QuotaError, got %#v", err)) {
+ c.Check(qerr.IsQuotaError(), check.Equals, true)
+ }
+
+ // Instance list should now have one entry, for the new
+ // instance.
+ list, err := is.Instances(nil)
+ c.Assert(err, check.IsNil)
+ c.Assert(list, check.HasLen, 1)
+ inst = list[0]
+ c.Check(inst.String(), check.Equals, "localhost")
+
+ // Instance's SSH server should execute shell commands.
+ exr := sshexecutor.New(inst)
+ exr.SetSigners(clientSSHKey)
+
+ stdout, stderr, err := exr.Execute(nil, "echo ok", nil)
+ c.Check(err, check.IsNil)
+ c.Check(string(stdout), check.Equals, "ok\n")
+ c.Check(string(stderr), check.Equals, "")
+
+ // SSH server should propagate stderr and non-zero exit
+ // status.
+ stdout, stderr, err = exr.Execute(nil, "echo fail && echo -n fail2 >&2 && false", nil)
+ c.Check(err, check.FitsTypeOf, &ssh.ExitError{})
+ c.Check(string(stdout), check.Equals, "fail\n")
+ c.Check(string(stderr), check.Equals, "fail2")
+
+ // SSH server should strip "sudo" from the front of the
+ // command.
+ withoutsudo, _, err := exr.Execute(nil, "whoami", nil)
+ c.Check(err, check.IsNil)
+ withsudo, _, err := exr.Execute(nil, "sudo whoami", nil)
+ c.Check(err, check.IsNil)
+ c.Check(string(withsudo), check.Equals, string(withoutsudo))
+
+ // SSH server should reject keys other than the one whose
+ // public key we passed to Create.
+ badRSAKey, err := rsa.GenerateKey(rand.Reader, 1024)
+ c.Assert(err, check.IsNil)
+ badSSHKey, err := ssh.NewSignerFromKey(badRSAKey)
+ c.Assert(err, check.IsNil)
+ // Create a new executor here, otherwise Execute would reuse
+ // the existing connection instead of authenticating with
+ // badRSAKey.
+ exr = sshexecutor.New(inst)
+ exr.SetSigners(badSSHKey)
+ stdout, stderr, err = exr.Execute(nil, "true", nil)
+ c.Check(err, check.ErrorMatches, `.*unable to authenticate.*`)
+
+ // Destroying the instance causes it to disappear from the
+ // list, and allows us to create one more.
+ err = inst.Destroy()
+ c.Check(err, check.IsNil)
+ list, err = is.Instances(nil)
+ c.Assert(err, check.IsNil)
+ c.Assert(list, check.HasLen, 0)
+ _, err = is.Create(it, "testImageID", cloud.InstanceTags{"instanceTag": "instanceTagValue"}, "testInitCommand", clientSSHPubKey)
+ c.Check(err, check.IsNil)
+ _, err = is.Create(it, "testImageID", cloud.InstanceTags{"instanceTag": "instanceTagValue"}, "testInitCommand", clientSSHPubKey)
+ c.Check(err, check.NotNil)
+ list, err = is.Instances(nil)
+ c.Assert(err, check.IsNil)
+ c.Assert(list, check.HasLen, 1)
+}
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index e60880c21..b09c1ecb9 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1278,7 +1278,9 @@ Clusters:
# need to be detected and cleaned up manually.
TagKeyPrefix: Arvados
- # Cloud driver: "azure" (Microsoft Azure) or "ec2" (Amazon AWS).
+ # Cloud driver: "azure" (Microsoft Azure), "ec2" (Amazon AWS),
+ # or "loopback" (run containers on dispatch host for testing
+ # purposes).
Driver: ec2
# Cloud-specific driver parameters.
diff --git a/lib/config/load.go b/lib/config/load.go
index 8f8ab2bf2..abb3a804b 100644
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -16,6 +16,7 @@ import (
"io/ioutil"
"os"
"regexp"
+ "runtime"
"strconv"
"strings"
"time"
@@ -25,6 +26,7 @@ import (
"github.com/imdario/mergo"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
)
//go:embed config.default.yml
@@ -297,7 +299,10 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
ldr.loadOldKeepBalanceConfig,
)
}
- loadFuncs = append(loadFuncs, ldr.setImplicitStorageClasses)
+ loadFuncs = append(loadFuncs,
+ ldr.setImplicitStorageClasses,
+ ldr.setLoopbackInstanceType,
+ )
for _, f := range loadFuncs {
err = f(&cfg)
if err != nil {
@@ -414,6 +419,67 @@ func (ldr *Loader) checkEnum(label, value string, accepted ...string) error {
return fmt.Errorf("%s: unacceptable value %q: must be one of %q", label, value, accepted)
}
+func (ldr *Loader) setLoopbackInstanceType(cfg *arvados.Config) error {
+ for id, cc := range cfg.Clusters {
+ if !cc.Containers.CloudVMs.Enable || cc.Containers.CloudVMs.Driver != "loopback" {
+ continue
+ }
+ if len(cc.InstanceTypes) == 1 {
+ continue
+ }
+ if len(cc.InstanceTypes) > 1 {
+ return fmt.Errorf("Clusters.%s.InstanceTypes: cannot use multiple InstanceTypes with loopback driver", id)
+ }
+ // No InstanceTypes configured. Fill in implicit
+ // default.
+ hostram, err := getHostRAM()
+ if err != nil {
+ return err
+ }
+ scratch, err := getFilesystemSize(os.TempDir())
+ if err != nil {
+ return err
+ }
+ cc.InstanceTypes = arvados.InstanceTypeMap{"localhost": {
+ Name: "localhost",
+ ProviderType: "localhost",
+ VCPUs: runtime.NumCPU(),
+ RAM: hostram,
+ Scratch: scratch,
+ IncludedScratch: scratch,
+ }}
+ cfg.Clusters[id] = cc
+ }
+ return nil
+}
+
+func getFilesystemSize(path string) (arvados.ByteSize, error) {
+ var stat unix.Statfs_t
+ err := unix.Statfs(path, &stat)
+ if err != nil {
+ return 0, err
+ }
+ return arvados.ByteSize(stat.Blocks * uint64(stat.Bsize)), nil
+}
+
+var reMemTotal = regexp.MustCompile(`(^|\n)MemTotal: *(\d+) kB\n`)
+
+func getHostRAM() (arvados.ByteSize, error) {
+ buf, err := os.ReadFile("/proc/meminfo")
+ if err != nil {
+ return 0, err
+ }
+ m := reMemTotal.FindSubmatch(buf)
+ if m == nil {
+ return 0, errors.New("error parsing /proc/meminfo: no MemTotal")
+ }
+ kb, err := strconv.ParseInt(string(m[2]), 10, 64)
+ if err != nil {
+ return 0, fmt.Errorf("error parsing /proc/meminfo: %q: %w", m[2], err)
+ }
+ return arvados.ByteSize(kb) * 1024, nil
+}
+
func (ldr *Loader) setImplicitStorageClasses(cfg *arvados.Config) error {
cluster:
for id, cc := range cfg.Clusters {
diff --git a/lib/config/load_test.go b/lib/config/load_test.go
index abf321705..f4f578a87 100644
--- a/lib/config/load_test.go
+++ b/lib/config/load_test.go
@@ -13,6 +13,7 @@ import (
"os/exec"
"reflect"
"regexp"
+ "runtime"
"strings"
"testing"
"time"
@@ -597,6 +598,55 @@ func (s *LoadSuite) TestListKeys(c *check.C) {
}
}
+func (s *LoadSuite) TestLoopbackInstanceTypes(c *check.C) {
+ ldr := testLoader(c, `
+Clusters:
+ z1111:
+ Containers:
+ CloudVMs:
+ Enable: true
+ Driver: loopback
+ InstanceTypes:
+ a: {}
+ b: {}
+`, nil)
+ cfg, err := ldr.Load()
+ c.Check(err, check.ErrorMatches, `Clusters\.z1111\.InstanceTypes: cannot use multiple InstanceTypes with loopback driver`)
+
+ ldr = testLoader(c, `
+Clusters:
+ z1111:
+ Containers:
+ CloudVMs:
+ Enable: true
+ Driver: loopback
+`, nil)
+ cfg, err = ldr.Load()
+ c.Assert(err, check.IsNil)
+ cc, err := cfg.GetCluster("")
+ c.Assert(err, check.IsNil)
+ c.Check(cc.InstanceTypes, check.HasLen, 1)
+ c.Check(cc.InstanceTypes["localhost"].VCPUs, check.Equals, runtime.NumCPU())
+
+ ldr = testLoader(c, `
+Clusters:
+ z1111:
+ Containers:
+ CloudVMs:
+ Enable: true
+ Driver: loopback
+ InstanceTypes:
+ a:
+ VCPUs: 9
+`, nil)
+ cfg, err = ldr.Load()
+ c.Assert(err, check.IsNil)
+ cc, err = cfg.GetCluster("")
+ c.Assert(err, check.IsNil)
+ c.Check(cc.InstanceTypes, check.HasLen, 1)
+ c.Check(cc.InstanceTypes["a"].VCPUs, check.Equals, 9)
+}
+
func (s *LoadSuite) TestImplicitStorageClasses(c *check.C) {
// If StorageClasses and Volumes.*.StorageClasses are all
// empty, there is a default storage class named "default".
@@ -820,3 +870,16 @@ arvados_config_source_timestamp_seconds{sha256="83aea5d82eb1d53372cd65c936c60acc
`)
}
}
+
+func (s *LoadSuite) TestGetHostRAM(c *check.C) {
+ hostram, err := getHostRAM()
+ c.Check(err, check.IsNil)
+ c.Logf("getHostRAM() == %v", hostram)
+}
+
+func (s *LoadSuite) TestGetFilesystemSize(c *check.C) {
+ path := c.MkDir()
+ size, err := getFilesystemSize(path)
+ c.Check(err, check.IsNil)
+ c.Logf("getFilesystemSize(%q) == %v", path, size)
+}
diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go
index 44be17c77..d65bb3149 100644
--- a/lib/controller/integration_test.go
+++ b/lib/controller/integration_test.go
@@ -11,6 +11,7 @@ import (
"encoding/json"
"fmt"
"io"
+ "io/fs"
"io/ioutil"
"math"
"net"
@@ -71,6 +72,17 @@ func (s *IntegrationSuite) SetUpSuite(c *check.C) {
Insecure: true
SystemLogs:
Format: text
+ Containers:
+ CloudVMs:
+ Enable: true
+ Driver: loopback
+ BootProbeCommand: id
+ ProbeInterval: 1s
+ PollInterval: 5s
+ SyncInterval: 10s
+ TimeoutIdle: 1s
+ TimeoutBooting: 2s
+ RuntimeEngine: singularity
RemoteClusters:
z1111:
Host: ` + hostport["z1111"] + `
@@ -1111,3 +1123,96 @@ func (s *IntegrationSuite) TestForwardRuntimeTokenToLoginCluster(c *check.C) {
c.Check(err, check.ErrorMatches, `request failed: .* 401 Unauthorized: cannot use a locally issued token to forward a request to our login cluster \(z1111\)`)
c.Check(err, check.Not(check.ErrorMatches), `(?ms).*127\.0\.0\.11.*`)
}
+
+func (s *IntegrationSuite) TestRunTrivialContainer(c *check.C) {
+ conn1 := s.super.Conn("z1111")
+ rootctx1, _, _ := s.super.RootClients("z1111")
+ _, ac1, kc1, _ := s.super.UserClients("z1111", rootctx1, c, conn1, s.oidcprovider.AuthEmail, true)
+
+ c.Log("[docker load]")
+ out, err := exec.Command("docker", "load", "--input", arvadostest.BusyboxDockerImage(c)).CombinedOutput()
+ c.Logf("[docker load done] %s", out)
+ c.Check(err, check.IsNil)
+
+ c.Log("[arv-keepdocker]")
+ akd := exec.Command("arv-keepdocker", "--no-resume", "busybox:uclibc")
+ akd.Env = append(os.Environ(), "ARVADOS_API_HOST="+ac1.APIHost, "ARVADOS_API_HOST_INSECURE=1", "ARVADOS_API_TOKEN="+ac1.AuthToken)
+ c.Logf("[arv-keepdocker env] %q", akd.Env)
+ out, err = akd.CombinedOutput()
+ c.Logf("[arv-keepdocker done] %s", out)
+ c.Check(err, check.IsNil)
+
+ var cr arvados.ContainerRequest
+ err = ac1.RequestAndDecode(&cr, "POST", "/arvados/v1/container_requests", nil, map[string]interface{}{
+ "container_request": map[string]interface{}{
+ "command": []string{"touch", "/out/hello_world"},
+ "container_image": "busybox:uclibc",
+ "cwd": "/tmp",
+ "environment": map[string]string{},
+ "mounts": map[string]arvados.Mount{"/out": {Kind: "tmp", Capacity: 10000}},
+ "output_path": "/out",
+ "runtime_constraints": arvados.RuntimeConstraints{RAM: 100000000, VCPUs: 1},
+ "priority": 1,
+ "state": arvados.ContainerRequestStateCommitted,
+ },
+ })
+ c.Assert(err, check.IsNil)
+
+ showlogs := func(collectionID string) {
+ var logcoll arvados.Collection
+ err = ac1.RequestAndDecode(&logcoll, "GET", "/arvados/v1/collections/"+collectionID, nil, nil)
+ c.Assert(err, check.IsNil)
+ cfs, err := logcoll.FileSystem(ac1, kc1)
+ c.Assert(err, check.IsNil)
+ fs.WalkDir(arvados.FS(cfs), "/", func(path string, d fs.DirEntry, err error) error {
+ if d.IsDir() || strings.HasPrefix(path, "/log for container") {
+ return nil
+ }
+ f, err := cfs.Open(path)
+ c.Assert(err, check.IsNil)
+ defer f.Close()
+ buf, err := ioutil.ReadAll(f)
+ c.Assert(err, check.IsNil)
+ c.Logf("=== %s\n%s\n", path, buf)
+ return nil
+ })
+ }
+
+ var ctr arvados.Container
+ var lastState arvados.ContainerState
+ deadline := time.Now().Add(time.Minute)
+wait:
+ for ; ; lastState = ctr.State {
+ err = ac1.RequestAndDecode(&ctr, "GET", "/arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
+ c.Assert(err, check.IsNil)
+ switch ctr.State {
+ case lastState:
+ if time.Now().After(deadline) {
+ c.Errorf("timed out, container request state is %q", cr.State)
+ showlogs(ctr.Log)
+ c.FailNow()
+ }
+ time.Sleep(time.Second / 2)
+ case arvados.ContainerStateComplete:
+ break wait
+ case arvados.ContainerStateQueued, arvados.ContainerStateLocked, arvados.ContainerStateRunning:
+ c.Logf("container state changed to %q", ctr.State)
+ default:
+ c.Errorf("unexpected container state %q", ctr.State)
+ showlogs(ctr.Log)
+ c.FailNow()
+ }
+ }
+ c.Check(ctr.ExitCode, check.Equals, 0)
+
+ err = ac1.RequestAndDecode(&cr, "GET", "/arvados/v1/container_requests/"+cr.UUID, nil, nil)
+ c.Assert(err, check.IsNil)
+
+ showlogs(cr.LogUUID)
+
+ var outcoll arvados.Collection
+ err = ac1.RequestAndDecode(&outcoll, "GET", "/arvados/v1/collections/"+cr.OutputUUID, nil, nil)
+ c.Assert(err, check.IsNil)
+ c.Check(outcoll.ManifestText, check.Matches, `\. d41d8.* 0:0:hello_world\n`)
+ c.Check(outcoll.PortableDataHash, check.Equals, "dac08d558cfb6c9536f604ca89e3c002+53")
+}
diff --git a/lib/crunchrun/executor_test.go b/lib/crunchrun/executor_test.go
index 99af0530f..1833fc8ac 100644
--- a/lib/crunchrun/executor_test.go
+++ b/lib/crunchrun/executor_test.go
@@ -7,43 +7,15 @@ package crunchrun
import (
"bytes"
"io"
- "io/ioutil"
- "net/http"
- "os"
"strings"
"time"
"git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadostest"
"golang.org/x/net/context"
. "gopkg.in/check.v1"
)
-func busyboxDockerImage(c *C) string {
- fnm := "busybox_uclibc.tar"
- cachedir := c.MkDir()
- cachefile := cachedir + "/" + fnm
- if _, err := os.Stat(cachefile); err == nil {
- return cachefile
- }
-
- f, err := ioutil.TempFile(cachedir, "")
- c.Assert(err, IsNil)
- defer f.Close()
- defer os.Remove(f.Name())
-
- resp, err := http.Get("https://cache.arvados.org/" + fnm)
- c.Assert(err, IsNil)
- defer resp.Body.Close()
- _, err = io.Copy(f, resp.Body)
- c.Assert(err, IsNil)
- err = f.Close()
- c.Assert(err, IsNil)
- err = os.Rename(f.Name(), cachefile)
- c.Assert(err, IsNil)
-
- return cachefile
-}
-
type nopWriteCloser struct{ io.Writer }
func (nopWriteCloser) Close() error { return nil }
@@ -71,7 +43,7 @@ func (s *executorSuite) SetUpTest(c *C) {
Stdout: nopWriteCloser{&s.stdout},
Stderr: nopWriteCloser{&s.stderr},
}
- err := s.executor.LoadImage("", busyboxDockerImage(c), arvados.Container{}, "", nil)
+ err := s.executor.LoadImage("", arvadostest.BusyboxDockerImage(c), arvados.Container{}, "", nil)
c.Assert(err, IsNil)
}
diff --git a/lib/dispatchcloud/driver.go b/lib/dispatchcloud/driver.go
index 5fcc0903f..93515defb 100644
--- a/lib/dispatchcloud/driver.go
+++ b/lib/dispatchcloud/driver.go
@@ -11,6 +11,7 @@ import (
"git.arvados.org/arvados.git/lib/cloud"
"git.arvados.org/arvados.git/lib/cloud/azure"
"git.arvados.org/arvados.git/lib/cloud/ec2"
+ "git.arvados.org/arvados.git/lib/cloud/loopback"
"git.arvados.org/arvados.git/sdk/go/arvados"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
@@ -21,8 +22,9 @@ import (
// Clusters.*.Containers.CloudVMs.Driver configuration values
// correspond to keys in this map.
var Drivers = map[string]cloud.Driver{
- "azure": azure.Driver,
- "ec2": ec2.Driver,
+ "azure": azure.Driver,
+ "ec2": ec2.Driver,
+ "loopback": loopback.Driver,
}
func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID, logger logrus.FieldLogger, reg *prometheus.Registry) (cloud.InstanceSet, error) {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 37e3fa988..66e0bfee9 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -13,6 +13,7 @@ import (
"fmt"
"io"
"io/ioutil"
+ mathrand "math/rand"
"sort"
"strings"
"sync"
@@ -774,6 +775,13 @@ func (wp *Pool) runProbes() {
workers := []cloud.InstanceID{}
for range probeticker.C {
+ // Add some jitter. Without this, if probeInterval is
+ // a multiple of syncInterval and sync is
+ // instantaneous (as with the loopback driver), the
+ // first few probes race with sync operations and
+ // don't update the workers.
+ time.Sleep(time.Duration(mathrand.Int63n(int64(wp.probeInterval) / 23)))
+
workers = workers[:0]
wp.mtx.Lock()
for id, wkr := range wp.workers {
@@ -900,7 +908,7 @@ func (wp *Pool) loadRunnerData() error {
}
wp.runnerData = buf
wp.runnerMD5 = md5.Sum(buf)
- wp.runnerCmd = fmt.Sprintf("/var/lib/arvados/crunch-run~%x", wp.runnerMD5)
+ wp.runnerCmd = fmt.Sprintf("/tmp/arvados-crunch-run/crunch-run~%x", wp.runnerMD5)
return nil
}
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index 9e89d7daa..1c8d62c20 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -313,6 +313,10 @@ func (wkr *worker) probeAndUpdate() {
// not yet running when ctrUUIDs was generated. Leave
// wkr.running alone and wait for the next probe to
// catch up on any changes.
+ logger.WithFields(logrus.Fields{
+ "updated": updated,
+ "wkr.updated": wkr.updated,
+ }).Debug("skipping worker state update due to probe/sync race")
return
}
@@ -387,6 +391,11 @@ func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) {
wkr.wp.mRunProbeDuration.WithLabelValues("fail").Observe(time.Now().Sub(before).Seconds())
return
}
+ wkr.logger.WithFields(logrus.Fields{
+ "Command": cmd,
+ "stdout": string(stdout),
+ "stderr": string(stderr),
+ }).Debug("probe succeeded")
wkr.wp.mRunProbeDuration.WithLabelValues("success").Observe(time.Now().Sub(before).Seconds())
ok = true
diff --git a/lib/install/init.go b/lib/install/init.go
index 6a52441d7..d2fed1dd7 100644
--- a/lib/install/init.go
+++ b/lib/install/init.go
@@ -160,6 +160,9 @@ func (initcmd *initCommand) RunCommand(prog string, args []string, stdin io.Read
{{end}}
Containers:
DispatchPrivateKey: {{printf "%q" .GenerateSSHPrivateKey}}
+ CloudVMs:
+ Enable: true
+ Driver: loopback
ManagementToken: {{printf "%q" ( .RandomHex 50 )}}
PostgreSQL:
Connection:
diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go
index 3510a6db0..d6d75192f 100644
--- a/sdk/go/arvados/container.go
+++ b/sdk/go/arvados/container.go
@@ -37,6 +37,7 @@ type Container struct {
RuntimeAuthScopes []string `json:"runtime_auth_scopes"`
RuntimeToken string `json:"runtime_token"`
AuthUUID string `json:"auth_uuid"`
+ Log string `json:"log"`
}
// ContainerRequest is an arvados#container_request resource.
diff --git a/sdk/go/arvadostest/busybox_image.go b/sdk/go/arvadostest/busybox_image.go
new file mode 100644
index 000000000..0411491a6
--- /dev/null
+++ b/sdk/go/arvadostest/busybox_image.go
@@ -0,0 +1,43 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package arvadostest
+
+import (
+ "io"
+ "io/ioutil"
+ "net/http"
+ "os"
+
+ . "gopkg.in/check.v1"
+)
+
+// BusyboxDockerImage downloads the busybox:uclibc docker image
+// (busybox_uclibc.tar) from cache.arvados.org into a temporary file
+// and returns the temporary file name.
+func BusyboxDockerImage(c *C) string {
+ fnm := "busybox_uclibc.tar"
+ cachedir := c.MkDir()
+ cachefile := cachedir + "/" + fnm
+ if _, err := os.Stat(cachefile); err == nil {
+ return cachefile
+ }
+
+ f, err := ioutil.TempFile(cachedir, "")
+ c.Assert(err, IsNil)
+ defer f.Close()
+ defer os.Remove(f.Name())
+
+ resp, err := http.Get("https://cache.arvados.org/" + fnm)
+ c.Assert(err, IsNil)
+ defer resp.Body.Close()
+ _, err = io.Copy(f, resp.Body)
+ c.Assert(err, IsNil)
+ err = f.Close()
+ c.Assert(err, IsNil)
+ err = os.Rename(f.Name(), cachefile)
+ c.Assert(err, IsNil)
+
+ return cachefile
+}
commit 5209b149fee9202e4cc8763de2975a7563e86060
Author: Tom Clegg <tom at curii.com>
Date: Tue May 3 23:34:13 2022 -0400
15370: Avoid running multiple instances of rails apps from same dir.
Otherwise, cluster z1111, z2222, z3333 share a cache
directory (Rails.root+"/tmp"), and end up serving one another's cached
discovery docs.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/boot/passenger.go b/lib/boot/passenger.go
index f0cd02946..410befab9 100644
--- a/lib/boot/passenger.go
+++ b/lib/boot/passenger.go
@@ -28,8 +28,9 @@ var railsEnv = []string{
// Install a Rails application's dependencies, including phusion
// passenger.
type installPassenger struct {
- src string
- depends []supervisedTask
+ src string
+ varlibdir string
+ depends []supervisedTask
}
func (runner installPassenger) String() string {
@@ -49,33 +50,56 @@ func (runner installPassenger) Run(ctx context.Context, fail func(error), super
passengerInstallMutex.Lock()
defer passengerInstallMutex.Unlock()
+ appdir := runner.src
+ if super.ClusterType == "test" {
+ appdir = filepath.Join(super.tempdir, runner.varlibdir)
+ err = super.RunProgram(ctx, super.tempdir, runOptions{}, "mkdir", "-p", appdir)
+ if err != nil {
+ return err
+ }
+ err = super.RunProgram(ctx, filepath.Join(super.SourcePath, runner.src), runOptions{}, "rsync",
+ "-a", "--no-owner", "--no-group", "--delete-after", "--delete-excluded",
+ "--exclude", "/coverage",
+ "--exclude", "/log",
+ "--exclude", "/node_modules",
+ "--exclude", "/tmp",
+ "--exclude", "/public/assets",
+ "--exclude", "/vendor",
+ "--exclude", "/config/environments",
+ "./",
+ appdir+"/")
+ if err != nil {
+ return err
+ }
+ }
+
var buf bytes.Buffer
- err = super.RunProgram(ctx, runner.src, runOptions{output: &buf}, "gem", "list", "--details", "bundler")
+ err = super.RunProgram(ctx, appdir, runOptions{output: &buf}, "gem", "list", "--details", "bundler")
if err != nil {
return err
}
for _, version := range []string{"2.2.19"} {
if !strings.Contains(buf.String(), "("+version+")") {
- err = super.RunProgram(ctx, runner.src, runOptions{}, "gem", "install", "--user", "--conservative", "--no-document", "bundler:2.2.19")
+ err = super.RunProgram(ctx, appdir, runOptions{}, "gem", "install", "--user", "--conservative", "--no-document", "bundler:2.2.19")
if err != nil {
return err
}
break
}
}
- err = super.RunProgram(ctx, runner.src, runOptions{}, "bundle", "install", "--jobs", "4", "--path", filepath.Join(os.Getenv("HOME"), ".gem"))
+ err = super.RunProgram(ctx, appdir, runOptions{}, "bundle", "install", "--jobs", "4", "--path", filepath.Join(os.Getenv("HOME"), ".gem"))
if err != nil {
return err
}
- err = super.RunProgram(ctx, runner.src, runOptions{}, "bundle", "exec", "passenger-config", "build-native-support")
+ err = super.RunProgram(ctx, appdir, runOptions{}, "bundle", "exec", "passenger-config", "build-native-support")
if err != nil {
return err
}
- err = super.RunProgram(ctx, runner.src, runOptions{}, "bundle", "exec", "passenger-config", "install-standalone-runtime")
+ err = super.RunProgram(ctx, appdir, runOptions{}, "bundle", "exec", "passenger-config", "install-standalone-runtime")
if err != nil {
return err
}
- err = super.RunProgram(ctx, runner.src, runOptions{}, "bundle", "exec", "passenger-config", "validate-install")
+ err = super.RunProgram(ctx, appdir, runOptions{}, "bundle", "exec", "passenger-config", "validate-install")
if err != nil && !strings.Contains(err.Error(), "exit status 2") {
// Exit code 2 indicates there were warnings (like
// "other passenger installations have been detected",
@@ -107,9 +131,12 @@ func (runner runPassenger) Run(ctx context.Context, fail func(error), super *Sup
return fmt.Errorf("bug: no internalPort for %q: %v (%#v)", runner, err, runner.svc)
}
var appdir string
- if super.ClusterType == "production" {
+ switch super.ClusterType {
+ case "production":
appdir = "/var/lib/arvados/" + runner.varlibdir
- } else {
+ case "test":
+ appdir = filepath.Join(super.tempdir, runner.varlibdir)
+ default:
appdir = runner.src
}
loglevel := "4"
diff --git a/lib/boot/supervisor.go b/lib/boot/supervisor.go
index a5d61b4a1..5ec1b2244 100644
--- a/lib/boot/supervisor.go
+++ b/lib/boot/supervisor.go
@@ -370,14 +370,14 @@ func (super *Supervisor) runCluster() error {
runServiceCommand{name: "keepstore", svc: super.cluster.Services.Keepstore},
runServiceCommand{name: "keep-web", svc: super.cluster.Services.WebDAV},
runServiceCommand{name: "ws", svc: super.cluster.Services.Websocket, depends: []supervisedTask{seedDatabase{}}},
- installPassenger{src: "services/api"},
- runPassenger{src: "services/api", varlibdir: "railsapi", svc: super.cluster.Services.RailsAPI, depends: []supervisedTask{createCertificates{}, seedDatabase{}, installPassenger{src: "services/api"}}},
+ installPassenger{src: "services/api", varlibdir: "railsapi"},
+ runPassenger{src: "services/api", varlibdir: "railsapi", svc: super.cluster.Services.RailsAPI, depends: []supervisedTask{createCertificates{}, seedDatabase{}, installPassenger{src: "services/api", varlibdir: "railsapi"}}},
seedDatabase{},
}
if !super.NoWorkbench1 {
tasks = append(tasks,
- installPassenger{src: "apps/workbench", depends: []supervisedTask{seedDatabase{}}}, // dependency ensures workbench doesn't delay api install/startup
- runPassenger{src: "apps/workbench", varlibdir: "workbench1", svc: super.cluster.Services.Workbench1, depends: []supervisedTask{installPassenger{src: "apps/workbench"}}},
+ installPassenger{src: "apps/workbench", varlibdir: "workbench1", depends: []supervisedTask{seedDatabase{}}}, // dependency ensures workbench doesn't delay api install/startup
+ runPassenger{src: "apps/workbench", varlibdir: "workbench1", svc: super.cluster.Services.Workbench1, depends: []supervisedTask{installPassenger{src: "apps/workbench", varlibdir: "workbench1"}}},
)
}
if !super.NoWorkbench2 {
commit 053537576ad3a141adfba1dd58d0ac2c4d92a399
Author: Tom Clegg <tom at curii.com>
Date: Mon Apr 25 10:03:22 2022 -0400
Build smaller dev binaries with -s -w, as with packaged binaries.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/build/run-tests.sh b/build/run-tests.sh
index 0f996f77e..9f5d37322 100755
--- a/build/run-tests.sh
+++ b/build/run-tests.sh
@@ -736,7 +736,7 @@ do_test() {
go_ldflags() {
version=${ARVADOS_VERSION:-$(git log -n1 --format=%H)-dev}
- echo "-X git.arvados.org/arvados.git/lib/cmd.version=${version} -X main.version=${version}"
+ echo "-X git.arvados.org/arvados.git/lib/cmd.version=${version} -X main.version=${version} -s -w"
}
do_test_once() {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list