[ARVADOS] updated: 1.3.0-360-ge1e0f6789

Git user git at public.curoverse.com
Mon Feb 18 13:51:32 EST 2019


Summary of changes:
 lib/controller/cmd.go                         |   4 +-
 lib/controller/federation_test.go             |   7 +-
 lib/controller/handler_test.go                |  12 ++-
 lib/controller/server_test.go                 |  18 +----
 lib/dispatchcloud/cmd.go                      |   6 +-
 lib/dispatchcloud/container/queue.go          |  21 ++++-
 lib/dispatchcloud/dispatcher.go               |   7 +-
 lib/dispatchcloud/dispatcher_test.go          |  12 ++-
 lib/dispatchcloud/scheduler/run_queue_test.go |  11 ++-
 lib/dispatchcloud/scheduler/scheduler.go      |   6 +-
 lib/dispatchcloud/scheduler/sync.go           |  22 +++++-
 lib/dispatchcloud/test/logger.go              |  19 -----
 lib/dispatchcloud/test/stub_driver.go         |   4 +-
 lib/dispatchcloud/worker/pool.go              | 109 ++++++++++++--------------
 lib/dispatchcloud/worker/pool_test.go         |   4 +-
 lib/dispatchcloud/worker/worker.go            |   3 +-
 lib/dispatchcloud/worker/worker_test.go       |   2 +-
 lib/service/cmd.go                            |  16 ++--
 sdk/go/arvados/config.go                      |   6 ++
 sdk/go/ctxlog/log.go                          |  68 +++++++++++++---
 20 files changed, 219 insertions(+), 138 deletions(-)
 delete mode 100644 lib/dispatchcloud/test/logger.go

       via  e1e0f678978cd2e6d81cba1f61de06effa961afa (commit)
       via  87bf45c8c4db3c7dd58edabab429286dcabfcd7d (commit)
       via  91b39ff3fbba69e54a330a7949c9ed4bbb3117ff (commit)
       via  80c48b78fe5029fe3389275b9e51fe0be65cb11d (commit)
       via  abd21f165dbfd84eaea9ca1d7f20ce195c52d679 (commit)
      from  d6fbaeba4da4ca1dcd70fe48a8875a84c17214da (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit e1e0f678978cd2e6d81cba1f61de06effa961afa
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Mon Feb 18 13:50:25 2019 -0500

    14807: Log idle time in seconds instead of nanoseconds.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index f70e20998..9be9f41f4 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -14,6 +14,7 @@ import (
 
 	"git.curoverse.com/arvados.git/lib/cloud"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/stats"
 	"github.com/sirupsen/logrus"
 )
 
@@ -416,7 +417,7 @@ func (wkr *worker) shutdownIfIdle() bool {
 
 	wkr.logger.WithFields(logrus.Fields{
 		"State":        wkr.state,
-		"Age":          age,
+		"IdleDuration": stats.Duration(age),
 		"IdleBehavior": wkr.idleBehavior,
 	}).Info("shutdown idle worker")
 	wkr.shutdown()

commit 87bf45c8c4db3c7dd58edabab429286dcabfcd7d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Mon Feb 18 13:17:08 2019 -0500

    14807: Cancel or requeue container when priority drops to zero.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/scheduler/sync.go b/lib/dispatchcloud/scheduler/sync.go
index 47c754e24..ed78a8dae 100644
--- a/lib/dispatchcloud/scheduler/sync.go
+++ b/lib/dispatchcloud/scheduler/sync.go
@@ -50,7 +50,7 @@ func (sch *Scheduler) sync() {
 			} else if !exited.IsZero() && qUpdated.After(exited) {
 				go cancel(ent, "state=\"Running\" after crunch-run exited")
 			} else if ent.Container.Priority == 0 {
-				go kill(ent, fmt.Sprintf("priority=%d", ent.Container.Priority))
+				go kill(ent, "priority=0")
 			}
 		case arvados.ContainerStateComplete, arvados.ContainerStateCancelled:
 			if running {
@@ -82,16 +82,30 @@ func (sch *Scheduler) sync() {
 			if running && !exited.IsZero() && qUpdated.After(exited) {
 				logger := sch.logger.WithFields(logrus.Fields{
 					"ContainerUUID": uuid,
+					"State":         ent.Container.State,
+					"Priority":      ent.Container.Priority,
 					"Exited":        time.Since(exited).Seconds(),
 				})
-				logger.Infof("requeueing container because state=%q after crunch-run exited", ent.Container.State)
+				logger.Info("requeueing locked container after crunch-run exited")
+				err := sch.queue.Unlock(uuid)
+				if err != nil {
+					logger.WithError(err).Error("error requeueing container")
+				}
+			} else if running && exited.IsZero() && ent.Container.Priority == 0 {
+				go kill(ent, "priority=0")
+			} else if !running && ent.Container.Priority == 0 {
+				logger := sch.logger.WithField("ContainerUUID", uuid)
+				logger.Info("unlocking container because priority=0")
 				err := sch.queue.Unlock(uuid)
 				if err != nil {
-					logger.WithError(err).Info("error requeueing container")
+					logger.WithError(err).Error("error requeueing container")
 				}
 			}
 		default:
-			sch.logger.WithField("ContainerUUID", uuid).Errorf("BUG: unexpected state %q", ent.Container.State)
+			sch.logger.WithFields(logrus.Fields{
+				"ContainerUUID": uuid,
+				"State":         ent.Container.State,
+			}).Error("BUG: unexpected state")
 		}
 	}
 }

commit 91b39ff3fbba69e54a330a7949c9ed4bbb3117ff
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Mon Feb 18 12:33:21 2019 -0500

    14807: Use context to pass a suitable logger to all service commands.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/controller/cmd.go b/lib/controller/cmd.go
index 94eb2580b..c1d4657ba 100644
--- a/lib/controller/cmd.go
+++ b/lib/controller/cmd.go
@@ -5,6 +5,8 @@
 package controller
 
 import (
+	"context"
+
 	"git.curoverse.com/arvados.git/lib/cmd"
 	"git.curoverse.com/arvados.git/lib/service"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -12,6 +14,6 @@ import (
 
 var Command cmd.Handler = service.Command(arvados.ServiceNameController, newHandler)
 
-func newHandler(cluster *arvados.Cluster, np *arvados.NodeProfile) service.Handler {
+func newHandler(_ context.Context, cluster *arvados.Cluster, np *arvados.NodeProfile) service.Handler {
 	return &Handler{Cluster: cluster, NodeProfile: np}
 }
diff --git a/lib/controller/federation_test.go b/lib/controller/federation_test.go
index c935e20be..79b534dc8 100644
--- a/lib/controller/federation_test.go
+++ b/lib/controller/federation_test.go
@@ -19,6 +19,7 @@ import (
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
 	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 	"github.com/sirupsen/logrus"
@@ -29,7 +30,7 @@ import (
 var _ = check.Suite(&FederationSuite{})
 
 type FederationSuite struct {
-	log *logrus.Logger
+	log logrus.FieldLogger
 	// testServer and testHandler are the controller being tested,
 	// "zhome".
 	testServer  *httpserver.Server
@@ -44,9 +45,7 @@ type FederationSuite struct {
 }
 
 func (s *FederationSuite) SetUpTest(c *check.C) {
-	s.log = logrus.New()
-	s.log.Formatter = &logrus.JSONFormatter{}
-	s.log.Out = &logWriter{c.Log}
+	s.log = ctxlog.TestLogger(c)
 
 	s.remoteServer = newServerFromIntegrationTestEnv(c)
 	c.Assert(s.remoteServer.Start(), check.IsNil)
diff --git a/lib/controller/handler_test.go b/lib/controller/handler_test.go
index f11228a31..dfe60d90a 100644
--- a/lib/controller/handler_test.go
+++ b/lib/controller/handler_test.go
@@ -5,6 +5,7 @@
 package controller
 
 import (
+	"context"
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
@@ -16,6 +17,7 @@ import (
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
 	check "gopkg.in/check.v1"
 )
@@ -30,9 +32,13 @@ var _ = check.Suite(&HandlerSuite{})
 type HandlerSuite struct {
 	cluster *arvados.Cluster
 	handler http.Handler
+	ctx     context.Context
+	cancel  context.CancelFunc
 }
 
 func (s *HandlerSuite) SetUpTest(c *check.C) {
+	s.ctx, s.cancel = context.WithCancel(context.Background())
+	s.ctx = ctxlog.Context(s.ctx, ctxlog.New(os.Stderr, "json", "debug"))
 	s.cluster = &arvados.Cluster{
 		ClusterID:  "zzzzz",
 		PostgreSQL: integrationTestCluster().PostgreSQL,
@@ -44,7 +50,11 @@ func (s *HandlerSuite) SetUpTest(c *check.C) {
 		},
 	}
 	node := s.cluster.NodeProfiles["*"]
-	s.handler = newHandler(s.cluster, &node)
+	s.handler = newHandler(s.ctx, s.cluster, &node)
+}
+
+func (s *HandlerSuite) TearDownTest(c *check.C) {
+	s.cancel()
 }
 
 func (s *HandlerSuite) TestProxyDiscoveryDoc(c *check.C) {
diff --git a/lib/controller/server_test.go b/lib/controller/server_test.go
index 95f17e79e..ae89c3d7e 100644
--- a/lib/controller/server_test.go
+++ b/lib/controller/server_test.go
@@ -5,28 +5,16 @@
 package controller
 
 import (
-	"bytes"
 	"net/http"
 	"os"
 	"path/filepath"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
-	"github.com/sirupsen/logrus"
 	check "gopkg.in/check.v1"
 )
 
-// logWriter is an io.Writer that writes by calling a "write log"
-// function, typically (*check.C)Log().
-type logWriter struct {
-	logfunc func(...interface{})
-}
-
-func (tl *logWriter) Write(buf []byte) (int, error) {
-	tl.logfunc(string(bytes.TrimRight(buf, "\n")))
-	return len(buf), nil
-}
-
 func integrationTestCluster() *arvados.Cluster {
 	cfg, err := arvados.GetConfig(filepath.Join(os.Getenv("WORKSPACE"), "tmp", "arvados.yml"))
 	if err != nil {
@@ -42,9 +30,7 @@ func integrationTestCluster() *arvados.Cluster {
 // Return a new unstarted controller server, using the Rails API
 // provided by the integration-testing environment.
 func newServerFromIntegrationTestEnv(c *check.C) *httpserver.Server {
-	log := logrus.New()
-	log.Formatter = &logrus.JSONFormatter{}
-	log.Out = &logWriter{c.Log}
+	log := ctxlog.TestLogger(c)
 
 	nodeProfile := arvados.NodeProfile{
 		Controller: arvados.SystemServiceInstance{Listen: ":"},
diff --git a/lib/dispatchcloud/cmd.go b/lib/dispatchcloud/cmd.go
index 92948fb30..7231e8394 100644
--- a/lib/dispatchcloud/cmd.go
+++ b/lib/dispatchcloud/cmd.go
@@ -5,6 +5,8 @@
 package dispatchcloud
 
 import (
+	"context"
+
 	"git.curoverse.com/arvados.git/lib/cmd"
 	"git.curoverse.com/arvados.git/lib/service"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -12,8 +14,8 @@ import (
 
 var Command cmd.Handler = service.Command(arvados.ServiceNameDispatchCloud, newHandler)
 
-func newHandler(cluster *arvados.Cluster, _ *arvados.NodeProfile) service.Handler {
-	d := &dispatcher{Cluster: cluster}
+func newHandler(ctx context.Context, cluster *arvados.Cluster, _ *arvados.NodeProfile) service.Handler {
+	d := &dispatcher{Cluster: cluster, Context: ctx}
 	go d.Start()
 	return d
 }
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index 814dba917..5be9283a8 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -5,6 +5,7 @@
 package dispatchcloud
 
 import (
+	"context"
 	"crypto/md5"
 	"encoding/json"
 	"fmt"
@@ -20,6 +21,7 @@ import (
 	"git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
+	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
 	"github.com/julienschmidt/httprouter"
 	"github.com/prometheus/client_golang/prometheus"
@@ -42,6 +44,7 @@ type pool interface {
 
 type dispatcher struct {
 	Cluster       *arvados.Cluster
+	Context       context.Context
 	InstanceSetID cloud.InstanceSetID
 
 	logger      logrus.FieldLogger
@@ -116,7 +119,7 @@ func (disp *dispatcher) initialize() {
 	}
 	disp.stop = make(chan struct{}, 1)
 	disp.stopped = make(chan struct{})
-	disp.logger = logrus.StandardLogger()
+	disp.logger = ctxlog.FromContext(disp.Context)
 
 	if key, err := ssh.ParsePrivateKey([]byte(disp.Cluster.Dispatch.PrivateKey)); err != nil {
 		disp.logger.Fatalf("error parsing configured Dispatch.PrivateKey: %s", err)
@@ -166,7 +169,7 @@ func (disp *dispatcher) run() {
 	if pollInterval <= 0 {
 		pollInterval = defaultPollInterval
 	}
-	sched := scheduler.New(disp.logger, disp.queue, disp.pool, staleLockTimeout, pollInterval)
+	sched := scheduler.New(disp.Context, disp.queue, disp.pool, staleLockTimeout, pollInterval)
 	sched.Start()
 	defer sched.Stop()
 
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 0558d79f1..36b060207 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -5,6 +5,7 @@
 package dispatchcloud
 
 import (
+	"context"
 	"encoding/json"
 	"io/ioutil"
 	"math/rand"
@@ -16,6 +17,7 @@ import (
 
 	"git.curoverse.com/arvados.git/lib/dispatchcloud/test"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	"golang.org/x/crypto/ssh"
 	check "gopkg.in/check.v1"
 )
@@ -23,12 +25,16 @@ import (
 var _ = check.Suite(&DispatcherSuite{})
 
 type DispatcherSuite struct {
+	ctx        context.Context
+	cancel     context.CancelFunc
 	cluster    *arvados.Cluster
 	stubDriver *test.StubDriver
 	disp       *dispatcher
 }
 
 func (s *DispatcherSuite) SetUpTest(c *check.C) {
+	s.ctx, s.cancel = context.WithCancel(context.Background())
+	s.ctx = ctxlog.Context(s.ctx, ctxlog.TestLogger(c))
 	dispatchpub, _ := test.LoadTestKey(c, "test/sshkey_dispatch")
 	dispatchprivraw, err := ioutil.ReadFile("test/sshkey_dispatch")
 	c.Assert(err, check.IsNil)
@@ -73,13 +79,17 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
 			},
 		},
 	}
-	s.disp = &dispatcher{Cluster: s.cluster}
+	s.disp = &dispatcher{
+		Cluster: s.cluster,
+		Context: s.ctx,
+	}
 	// Test cases can modify s.cluster before calling
 	// initialize(), and then modify private state before calling
 	// go run().
 }
 
 func (s *DispatcherSuite) TearDownTest(c *check.C) {
+	s.cancel()
 	s.disp.Close()
 }
 
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
index 7dd6866c0..4296a1364 100644
--- a/lib/dispatchcloud/scheduler/run_queue_test.go
+++ b/lib/dispatchcloud/scheduler/run_queue_test.go
@@ -5,12 +5,14 @@
 package scheduler
 
 import (
+	"context"
 	"sync"
 	"time"
 
 	"git.curoverse.com/arvados.git/lib/dispatchcloud/test"
 	"git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	check "gopkg.in/check.v1"
 )
 
@@ -120,6 +122,7 @@ type SchedulerSuite struct{}
 // immediately. Don't try to create any other nodes after the failed
 // create.
 func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
+	ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
 	queue := test.Queue{
 		ChooseType: chooseType,
 		Containers: []arvados.Container{
@@ -174,7 +177,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
 		running:   map[string]time.Time{},
 		canCreate: 0,
 	}
-	New(test.Logger(), &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+	New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
 	c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1)})
 	c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)})
 	c.Check(pool.running, check.HasLen, 1)
@@ -186,6 +189,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
 // If Create() fails, shutdown some nodes, and don't call Create()
 // again.  Don't call Create() at all if AtQuota() is true.
 func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
+	ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
 	for quota := 0; quota < 2; quota++ {
 		c.Logf("quota=%d", quota)
 		shouldCreate := []arvados.InstanceType{}
@@ -229,7 +233,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
 			starts:    []string{},
 			canCreate: 0,
 		}
-		New(test.Logger(), &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+		New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
 		c.Check(pool.creates, check.DeepEquals, shouldCreate)
 		c.Check(pool.starts, check.DeepEquals, []string{})
 		c.Check(pool.shutdowns, check.Not(check.Equals), 0)
@@ -239,6 +243,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
 // Start lower-priority containers while waiting for new/existing
 // workers to come up for higher-priority containers.
 func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
+	ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
 	pool := stubPool{
 		unalloc: map[arvados.InstanceType]int{
 			test.InstanceType(1): 2,
@@ -317,7 +322,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
 		},
 	}
 	queue.Update()
-	New(test.Logger(), &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+	New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
 	c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(2), test.InstanceType(1)})
 	c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]})
 	running := map[string]bool{}
diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go
index 070b2de00..97cacee08 100644
--- a/lib/dispatchcloud/scheduler/scheduler.go
+++ b/lib/dispatchcloud/scheduler/scheduler.go
@@ -7,9 +7,11 @@
 package scheduler
 
 import (
+	"context"
 	"sync"
 	"time"
 
+	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	"github.com/sirupsen/logrus"
 )
 
@@ -44,9 +46,9 @@ type Scheduler struct {
 //
 // Any given queue and pool should not be used by more than one
 // scheduler at a time.
-func New(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler {
+func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler {
 	return &Scheduler{
-		logger:              logger,
+		logger:              ctxlog.FromContext(ctx),
 		queue:               queue,
 		pool:                pool,
 		staleLockTimeout:    staleLockTimeout,
diff --git a/lib/dispatchcloud/test/logger.go b/lib/dispatchcloud/test/logger.go
deleted file mode 100644
index a59eeb689..000000000
--- a/lib/dispatchcloud/test/logger.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package test
-
-import (
-	"os"
-
-	"github.com/sirupsen/logrus"
-)
-
-func Logger() logrus.FieldLogger {
-	logger := logrus.StandardLogger()
-	if os.Getenv("ARVADOS_DEBUG") != "" {
-		logger.SetLevel(logrus.DebugLevel)
-	}
-	return logger
-}
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
index c0d2e61fc..5873e4922 100644
--- a/lib/dispatchcloud/test/stub_driver.go
+++ b/lib/dispatchcloud/test/stub_driver.go
@@ -62,6 +62,7 @@ func (sd *StubDriver) InstanceSet(params json.RawMessage, id cloud.InstanceSetID
 	}
 	sis := StubInstanceSet{
 		driver:  sd,
+		logger:  logger,
 		servers: map[cloud.InstanceID]*StubVM{},
 	}
 	sd.instanceSets = append(sd.instanceSets, &sis)
@@ -91,6 +92,7 @@ func (sd *StubDriver) ReleaseCloudOps(n int) {
 
 type StubInstanceSet struct {
 	driver  *StubDriver
+	logger  logrus.FieldLogger
 	servers map[cloud.InstanceID]*StubVM
 	mtx     sync.RWMutex
 	stopped bool
@@ -249,7 +251,7 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
 		svm.Unlock()
 		time.Sleep(svm.CrunchRunDetachDelay)
 		fmt.Fprintf(stderr, "starting %s\n", uuid)
-		logger := logrus.WithFields(logrus.Fields{
+		logger := svm.sis.logger.WithFields(logrus.Fields{
 			"Instance":      svm.id,
 			"ContainerUUID": uuid,
 		})
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
index 526bc552c..ffb7ab391 100644
--- a/lib/dispatchcloud/worker/pool_test.go
+++ b/lib/dispatchcloud/worker/pool_test.go
@@ -62,7 +62,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
 		}
 	}
 
-	logger := test.Logger()
+	logger := ctxlog.TestLogger(c)
 	driver := &test.StubDriver{}
 	is, err := driver.InstanceSet(nil, "", logger)
 	c.Assert(err, check.IsNil)
@@ -124,7 +124,7 @@ func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
 }
 
 func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
-	logger := test.Logger()
+	logger := ctxlog.TestLogger(c)
 	driver := test.StubDriver{HoldCloudOps: true}
 	instanceSet, err := driver.InstanceSet(nil, "", logger)
 	c.Assert(err, check.IsNil)
diff --git a/lib/dispatchcloud/worker/worker_test.go b/lib/dispatchcloud/worker/worker_test.go
index c5b09359f..65e093530 100644
--- a/lib/dispatchcloud/worker/worker_test.go
+++ b/lib/dispatchcloud/worker/worker_test.go
@@ -20,7 +20,7 @@ var _ = check.Suite(&WorkerSuite{})
 type WorkerSuite struct{}
 
 func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
-	logger := test.Logger()
+	logger := ctxlog.TestLogger(c)
 	bootTimeout := time.Minute
 	probeTimeout := time.Second
 
diff --git a/lib/service/cmd.go b/lib/service/cmd.go
index 7f6b0236c..d99af0eea 100644
--- a/lib/service/cmd.go
+++ b/lib/service/cmd.go
@@ -6,6 +6,7 @@
 package service
 
 import (
+	"context"
 	"flag"
 	"fmt"
 	"io"
@@ -14,6 +15,7 @@ import (
 
 	"git.curoverse.com/arvados.git/lib/cmd"
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/ctxlog"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
 	"github.com/coreos/go-systemd/daemon"
 	"github.com/sirupsen/logrus"
@@ -24,7 +26,7 @@ type Handler interface {
 	CheckHealth() error
 }
 
-type NewHandlerFunc func(*arvados.Cluster, *arvados.NodeProfile) Handler
+type NewHandlerFunc func(context.Context, *arvados.Cluster, *arvados.NodeProfile) Handler
 
 type command struct {
 	newHandler NewHandlerFunc
@@ -45,11 +47,7 @@ func Command(svcName arvados.ServiceName, newHandler NewHandlerFunc) cmd.Handler
 }
 
 func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
-	log := logrus.New()
-	log.Formatter = &logrus.JSONFormatter{
-		TimestampFormat: rfc3339NanoFixed,
-	}
-	log.Out = stderr
+	log := ctxlog.New(stderr, "json", "info")
 
 	var err error
 	defer func() {
@@ -76,6 +74,10 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
 	if err != nil {
 		return 1
 	}
+	log = ctxlog.New(stderr, cluster.Logging.Format, cluster.Logging.Level).WithFields(logrus.Fields{
+		"PID": os.Getpid(),
+	})
+	ctx := ctxlog.Context(context.Background(), log)
 	profileName := *nodeProfile
 	if profileName == "" {
 		profileName = os.Getenv("ARVADOS_NODE_PROFILE")
@@ -89,7 +91,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
 		err = fmt.Errorf("configuration does not enable the %s service on this host", c.svcName)
 		return 1
 	}
-	handler := c.newHandler(cluster, profile)
+	handler := c.newHandler(ctx, cluster, profile)
 	if err = handler.CheckHealth(); err != nil {
 		return 1
 	}
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 9586d3d8a..c2154d0f2 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -66,6 +66,12 @@ type Cluster struct {
 	RemoteClusters     map[string]RemoteCluster
 	PostgreSQL         PostgreSQL
 	RequestLimits      RequestLimits
+	Logging            Logging
+}
+
+type Logging struct {
+	Level  string
+	Format string
 }
 
 type PostgreSQL struct {
diff --git a/sdk/go/ctxlog/log.go b/sdk/go/ctxlog/log.go
index 45e4efdbe..e66eeadee 100644
--- a/sdk/go/ctxlog/log.go
+++ b/sdk/go/ctxlog/log.go
@@ -5,9 +5,13 @@
 package ctxlog
 
 import (
+	"bytes"
 	"context"
+	"io"
+	"os"
 
 	"github.com/sirupsen/logrus"
+	check "gopkg.in/check.v1"
 )
 
 var (
@@ -19,45 +23,87 @@ const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
 
 // Context returns a new child context such that FromContext(child)
 // returns the given logger.
-func Context(ctx context.Context, logger *logrus.Entry) context.Context {
+func Context(ctx context.Context, logger logrus.FieldLogger) context.Context {
 	return context.WithValue(ctx, loggerCtxKey, logger)
 }
 
 // FromContext returns the logger suitable for the given context -- the one
 // attached by contextWithLogger() if applicable, otherwise the
 // top-level logger with no fields/values.
-func FromContext(ctx context.Context) *logrus.Entry {
+func FromContext(ctx context.Context) logrus.FieldLogger {
 	if ctx != nil {
-		if logger, ok := ctx.Value(loggerCtxKey).(*logrus.Entry); ok {
+		if logger, ok := ctx.Value(loggerCtxKey).(logrus.FieldLogger); ok {
 			return logger
 		}
 	}
 	return rootLogger.WithFields(nil)
 }
 
+// New returns a new logger with the indicated format and
+// level.
+func New(out io.Writer, format, level string) logrus.FieldLogger {
+	logger := logrus.New()
+	logger.Out = out
+	setFormat(logger, format)
+	setLevel(logger, level)
+	return logger
+}
+
+func TestLogger(c *check.C) logrus.FieldLogger {
+	logger := logrus.New()
+	logger.Out = &logWriter{c.Log}
+	setFormat(logger, "text")
+	if d := os.Getenv("ARVADOS_DEBUG"); d != "0" && d != "" {
+		setLevel(logger, "debug")
+	} else {
+		setLevel(logger, "info")
+	}
+	return logger
+}
+
 // SetLevel sets the current logging level. See logrus for level
 // names.
 func SetLevel(level string) {
-	lvl, err := logrus.ParseLevel(level)
-	if err != nil {
-		logrus.Fatal(err)
+	setLevel(rootLogger, level)
+}
+
+func setLevel(logger *logrus.Logger, level string) {
+	if level == "" {
+	} else if lvl, err := logrus.ParseLevel(level); err != nil {
+		logrus.WithField("Level", level).Fatal("unknown log level")
+	} else {
+		logger.Level = lvl
 	}
-	rootLogger.Level = lvl
 }
 
 // SetFormat sets the current logging format to "json" or "text".
 func SetFormat(format string) {
+	setFormat(rootLogger, format)
+}
+
+func setFormat(logger *logrus.Logger, format string) {
 	switch format {
 	case "text":
-		rootLogger.Formatter = &logrus.TextFormatter{
+		logger.Formatter = &logrus.TextFormatter{
 			FullTimestamp:   true,
 			TimestampFormat: rfc3339NanoFixed,
 		}
-	case "json":
-		rootLogger.Formatter = &logrus.JSONFormatter{
+	case "json", "":
+		logger.Formatter = &logrus.JSONFormatter{
 			TimestampFormat: rfc3339NanoFixed,
 		}
 	default:
-		logrus.WithField("LogFormat", format).Fatal("unknown log format")
+		logrus.WithField("Format", format).Fatal("unknown log format")
 	}
 }
+
+// logWriter is an io.Writer that writes by calling a "write log"
+// function, typically (*check.C)Log().
+type logWriter struct {
+	logfunc func(...interface{})
+}
+
+func (tl *logWriter) Write(buf []byte) (int, error) {
+	tl.logfunc(string(bytes.TrimRight(buf, "\n")))
+	return len(buf), nil
+}

commit 80c48b78fe5029fe3389275b9e51fe0be65cb11d
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Sat Feb 16 22:25:44 2019 -0500

    14807: Log when a container is added/removed from the queue.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go
index 297782c35..bbe47625a 100644
--- a/lib/dispatchcloud/container/queue.go
+++ b/lib/dispatchcloud/container/queue.go
@@ -131,7 +131,7 @@ func (cq *Queue) Forget(uuid string) {
 	defer cq.mtx.Unlock()
 	ctr := cq.current[uuid].Container
 	if ctr.State == arvados.ContainerStateComplete || ctr.State == arvados.ContainerStateCancelled {
-		delete(cq.current, uuid)
+		cq.delEnt(uuid, ctr.State)
 	}
 }
 
@@ -196,7 +196,7 @@ func (cq *Queue) Update() error {
 			cq.current[uuid] = cur
 		}
 	}
-	for uuid := range cq.current {
+	for uuid, ent := range cq.current {
 		if _, dontupdate := cq.dontupdate[uuid]; dontupdate {
 			// Don't expunge an entry that was
 			// added/updated locally after we started
@@ -207,7 +207,7 @@ func (cq *Queue) Update() error {
 			// the poll response (evidently it's
 			// cancelled, completed, deleted, or taken by
 			// a different dispatcher).
-			delete(cq.current, uuid)
+			cq.delEnt(uuid, ent.Container.State)
 		}
 	}
 	cq.dontupdate = nil
@@ -216,6 +216,15 @@ func (cq *Queue) Update() error {
 	return nil
 }
 
+// Caller must have lock.
+func (cq *Queue) delEnt(uuid string, state arvados.ContainerState) {
+	cq.logger.WithFields(logrus.Fields{
+		"ContainerUUID": uuid,
+		"State":         state,
+	}).Info("dropping container from queue")
+	delete(cq.current, uuid)
+}
+
 func (cq *Queue) addEnt(uuid string, ctr arvados.Container) {
 	it, err := cq.chooseType(&ctr)
 	if err != nil && (ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked) {
@@ -269,6 +278,12 @@ func (cq *Queue) addEnt(uuid string, ctr arvados.Container) {
 		}()
 		return
 	}
+	cq.logger.WithFields(logrus.Fields{
+		"ContainerUUID": ctr.UUID,
+		"State":         ctr.State,
+		"Priority":      ctr.Priority,
+		"InstanceType":  it.Name,
+	}).Info("adding container to queue")
 	cq.current[uuid] = QueueEnt{Container: ctr, InstanceType: it}
 }
 

commit abd21f165dbfd84eaea9ca1d7f20ce195c52d679
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Sat Feb 16 22:17:26 2019 -0500

    14807: Split instance count/size/cost metrics by idle/hold status.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index ce66625a2..14f6a3efc 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -153,13 +153,11 @@ type Pool struct {
 	throttleCreate    throttle
 	throttleInstances throttle
 
-	mInstances         prometheus.Gauge
-	mInstancesPrice    prometheus.Gauge
 	mContainersRunning prometheus.Gauge
-	mVCPUs             prometheus.Gauge
-	mVCPUsInuse        prometheus.Gauge
-	mMemory            prometheus.Gauge
-	mMemoryInuse       prometheus.Gauge
+	mInstances         *prometheus.GaugeVec
+	mInstancesPrice    *prometheus.GaugeVec
+	mVCPUs             *prometheus.GaugeVec
+	mMemory            *prometheus.GaugeVec
 }
 
 // Subscribe returns a buffered channel that becomes ready after any
@@ -527,20 +525,6 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
 	if reg == nil {
 		reg = prometheus.NewRegistry()
 	}
-	wp.mInstances = prometheus.NewGauge(prometheus.GaugeOpts{
-		Namespace: "arvados",
-		Subsystem: "dispatchcloud",
-		Name:      "instances_total",
-		Help:      "Number of cloud VMs including pending, booting, running, held, and shutting down.",
-	})
-	reg.MustRegister(wp.mInstances)
-	wp.mInstancesPrice = prometheus.NewGauge(prometheus.GaugeOpts{
-		Namespace: "arvados",
-		Subsystem: "dispatchcloud",
-		Name:      "instances_price_total",
-		Help:      "Sum of prices of all cloud VMs including pending, booting, running, held, and shutting down.",
-	})
-	reg.MustRegister(wp.mInstancesPrice)
 	wp.mContainersRunning = prometheus.NewGauge(prometheus.GaugeOpts{
 		Namespace: "arvados",
 		Subsystem: "dispatchcloud",
@@ -548,35 +532,34 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
 		Help:      "Number of containers reported running by cloud VMs.",
 	})
 	reg.MustRegister(wp.mContainersRunning)
-
-	wp.mVCPUs = prometheus.NewGauge(prometheus.GaugeOpts{
+	wp.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "instances_total",
+		Help:      "Number of cloud VMs.",
+	}, []string{"category"})
+	reg.MustRegister(wp.mInstances)
+	wp.mInstancesPrice = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: "arvados",
+		Subsystem: "dispatchcloud",
+		Name:      "instances_price",
+		Help:      "Price of cloud VMs.",
+	}, []string{"category"})
+	reg.MustRegister(wp.mInstancesPrice)
+	wp.mVCPUs = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "arvados",
 		Subsystem: "dispatchcloud",
 		Name:      "vcpus_total",
 		Help:      "Total VCPUs on all cloud VMs.",
-	})
+	}, []string{"category"})
 	reg.MustRegister(wp.mVCPUs)
-	wp.mVCPUsInuse = prometheus.NewGauge(prometheus.GaugeOpts{
-		Namespace: "arvados",
-		Subsystem: "dispatchcloud",
-		Name:      "vcpus_inuse",
-		Help:      "VCPUs on cloud VMs that are running containers.",
-	})
-	reg.MustRegister(wp.mVCPUsInuse)
-	wp.mMemory = prometheus.NewGauge(prometheus.GaugeOpts{
+	wp.mMemory = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "arvados",
 		Subsystem: "dispatchcloud",
 		Name:      "memory_bytes_total",
 		Help:      "Total memory on all cloud VMs.",
-	})
+	}, []string{"category"})
 	reg.MustRegister(wp.mMemory)
-	wp.mMemoryInuse = prometheus.NewGauge(prometheus.GaugeOpts{
-		Namespace: "arvados",
-		Subsystem: "dispatchcloud",
-		Name:      "memory_bytes_inuse",
-		Help:      "Memory on cloud VMs that are running containers.",
-	})
-	reg.MustRegister(wp.mMemoryInuse)
 }
 
 func (wp *Pool) runMetrics() {
@@ -591,26 +574,38 @@ func (wp *Pool) updateMetrics() {
 	wp.mtx.RLock()
 	defer wp.mtx.RUnlock()
 
-	var price float64
-	var alloc, cpu, cpuInuse, mem, memInuse int64
+	instances := map[string]int64{}
+	price := map[string]float64{}
+	cpu := map[string]int64{}
+	mem := map[string]int64{}
+	var running int64
 	for _, wkr := range wp.workers {
-		price += wkr.instType.Price
-		cpu += int64(wkr.instType.VCPUs)
-		mem += int64(wkr.instType.RAM)
-		if len(wkr.running)+len(wkr.starting) == 0 {
-			continue
+		var cat string
+		switch {
+		case len(wkr.running)+len(wkr.starting) > 0:
+			cat = "inuse"
+		case wkr.idleBehavior == IdleBehaviorHold:
+			cat = "hold"
+		case wkr.state == StateBooting:
+			cat = "booting"
+		case wkr.state == StateUnknown:
+			cat = "unknown"
+		default:
+			cat = "idle"
 		}
-		alloc += int64(len(wkr.running) + len(wkr.starting))
-		cpuInuse += int64(wkr.instType.VCPUs)
-		memInuse += int64(wkr.instType.RAM)
-	}
-	wp.mInstances.Set(float64(len(wp.workers)))
-	wp.mInstancesPrice.Set(price)
-	wp.mContainersRunning.Set(float64(alloc))
-	wp.mVCPUs.Set(float64(cpu))
-	wp.mMemory.Set(float64(mem))
-	wp.mVCPUsInuse.Set(float64(cpuInuse))
-	wp.mMemoryInuse.Set(float64(memInuse))
+		instances[cat]++
+		price[cat] += wkr.instType.Price
+		cpu[cat] += int64(wkr.instType.VCPUs)
+		mem[cat] += int64(wkr.instType.RAM)
+		running += int64(len(wkr.running) + len(wkr.starting))
+	}
+	for _, cat := range []string{"inuse", "hold", "booting", "unknown", "idle"} {
+		wp.mInstances.WithLabelValues(cat).Set(float64(instances[cat]))
+		wp.mInstancesPrice.WithLabelValues(cat).Set(price[cat])
+		wp.mVCPUs.WithLabelValues(cat).Set(float64(cpu[cat]))
+		wp.mMemory.WithLabelValues(cat).Set(float64(mem[cat]))
+	}
+	wp.mContainersRunning.Set(float64(running))
 }
 
 func (wp *Pool) runProbes() {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list