[ARVADOS] updated: 1.3.0-124-gee1161113

Git user git at public.curoverse.com
Thu Dec 20 09:58:19 EST 2018


Summary of changes:
 lib/dispatchcloud/dispatcher.go          |  2 ++
 lib/dispatchcloud/dispatcher_test.go     |  1 +
 lib/dispatchcloud/scheduler/run_queue.go | 17 +++++++++--------
 lib/dispatchcloud/worker/pool.go         |  1 +
 4 files changed, 13 insertions(+), 8 deletions(-)

       via  ee1161113168d709f0cb0513f192631c1f35b3ab (commit)
       via  7b45cf3217fbee46cdd706893ec948bfedaae24c (commit)
       via  e787539455a4a0bf1e4f299460258905a56352d4 (commit)
       via  f8da36c66e4109730f7ad6f1a29f4e8cb921198c (commit)
      from  b3dbac2705f2bcffbf346d0b7efb85e2566b11f8 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit ee1161113168d709f0cb0513f192631c1f35b3ab
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Dec 20 09:44:45 2018 -0500

    14360: Fix instance creation limiting.
    
    Scheduler was claiming one of its unalloc instances only if the
    container could be scheduled, so the "need new instance" condition was
    not triggered for lower-priority containers needing the same instance
    type.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index dab1b6068..ece8e3d98 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -54,12 +54,13 @@ tryrun:
 			sch.bgLock(logger, ctr.UUID)
 			unalloc[it]--
 		case arvados.ContainerStateLocked:
-			if unalloc[it] < 1 {
-				if sch.pool.AtQuota() {
-					logger.Debug("not starting: AtQuota and no unalloc workers")
-					overquota = sorted[i:]
-					break tryrun
-				}
+			if unalloc[it] > 0 {
+				unalloc[it]--
+			} else if sch.pool.AtQuota() {
+				logger.Debug("not starting: AtQuota and no unalloc workers")
+				overquota = sorted[i:]
+				break tryrun
+			} else {
 				logger.Info("creating new instance")
 				err := sch.pool.Create(it)
 				if err != nil {
@@ -78,7 +79,6 @@ tryrun:
 					overquota = sorted[i:]
 					break tryrun
 				}
-				unalloc[it]++
 			}
 
 			if dontstart[it] {
@@ -87,7 +87,7 @@ tryrun:
 				// same instance type. Don't let this
 				// one sneak in ahead of it.
 			} else if sch.pool.StartContainer(it, ctr) {
-				unalloc[it]--
+				// Success.
 			} else {
 				dontstart[it] = true
 			}

commit 7b45cf3217fbee46cdd706893ec948bfedaae24c
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Dec 20 09:42:49 2018 -0500

    14360: Fix panic in test when last container is run twice.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
index 737688023..33823a828 100644
--- a/lib/dispatchcloud/dispatcher_test.go
+++ b/lib/dispatchcloud/dispatcher_test.go
@@ -126,6 +126,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
 		defer mtx.Unlock()
 		if _, ok := waiting[ctr.UUID]; !ok {
 			c.Logf("container completed twice: %s -- perhaps completed after stub instance was killed?", ctr.UUID)
+			return 1
 		}
 		delete(waiting, ctr.UUID)
 		if len(waiting) == 0 {

commit e787539455a4a0bf1e4f299460258905a56352d4
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Dec 20 09:27:37 2018 -0500

    14360: Debug log when acquiring lock.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index c8c25131e..dab1b6068 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -154,6 +154,7 @@ func (sch *Scheduler) bgLock(logger logrus.FieldLogger, uuid string) {
 			logger.WithError(err).Warn("error locking container")
 			return
 		}
+		logger.Debug("lock succeeded")
 		ctr, ok := sch.queue.Get(uuid)
 		if !ok {
 			logger.Error("(BUG?) container disappeared from queue after Lock succeeded")

commit f8da36c66e4109730f7ad6f1a29f4e8cb921198c
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Dec 20 09:25:56 2018 -0500

    14360: Shutdown pool between tests to eliminate leaking logs.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
index bea6ed3cc..81ad0ed3f 100644
--- a/lib/dispatchcloud/dispatcher.go
+++ b/lib/dispatchcloud/dispatcher.go
@@ -35,6 +35,7 @@ const (
 type pool interface {
 	scheduler.WorkerPool
 	Instances() []worker.InstanceView
+	Stop()
 }
 
 type dispatcher struct {
@@ -149,6 +150,7 @@ func (disp *dispatcher) initialize() {
 func (disp *dispatcher) run() {
 	defer close(disp.stopped)
 	defer disp.instanceSet.Stop()
+	defer disp.pool.Stop()
 
 	staleLockTimeout := time.Duration(disp.Cluster.Dispatch.StaleLockTimeout)
 	if staleLockTimeout == 0 {
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index 722d4e918..ff5f762c1 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -90,6 +90,7 @@ func NewPool(logger logrus.FieldLogger, reg *prometheus.Registry, instanceSet cl
 		timeoutBooting:     duration(cluster.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
 		timeoutProbe:       duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
 		timeoutShutdown:    duration(cluster.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
+		stop:               make(chan bool),
 	}
 	wp.registerMetrics(reg)
 	go func() {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list