[ARVADOS] updated: 1.3.0-238-g71fd4da18

Git user git at public.curoverse.com
Fri Feb 1 16:46:57 EST 2019


Summary of changes:
 .../app/assets/javascripts/components/edit_tags.js | 54 ++++++++---------
 .../app/controllers/work_units_controller.rb       |  2 +-
 apps/workbench/app/models/collection.rb            |  4 +-
 build/run-tests.sh                                 |  2 +-
 lib/controller/handler_test.go                     |  2 +-
 lib/dispatchcloud/container/queue.go               | 20 +++++--
 lib/dispatchcloud/container/queue_test.go          | 14 ++++-
 lib/dispatchcloud/worker/pool.go                   |  9 ++-
 lib/dispatchcloud/worker/worker.go                 | 44 +++++++++++---
 sdk/python/arvados/_normalize_stream.py            | 11 +++-
 sdk/python/arvados/collection.py                   | 17 +++++-
 sdk/python/tests/test_collections.py               | 41 ++++++++++++-
 sdk/ruby/lib/arvados/keep.rb                       | 45 ++++++++++-----
 sdk/ruby/test/test_keep_manifest.rb                | 67 +++++++++++++++++++++-
 .../app/controllers/user_sessions_controller.rb    | 43 ++++++++++----
 .../api/app/models/api_client_authorization.rb     |  7 +++
 .../functional/user_sessions_controller_test.rb    | 18 ++++++
 .../api/test/integration/user_sessions_test.rb     | 34 ++++++++---
 services/fuse/tests/test_mount.py                  |  9 +--
 19 files changed, 351 insertions(+), 92 deletions(-)

       via  71fd4da18b22100682ae7e2079aadfd66360d310 (commit)
       via  a641be48fcab06e2efd701fddb1b276c159f26cc (commit)
       via  affd909987cedc1d2418ff4d9cb031f01ffe4deb (commit)
       via  138ea2112594804cbb2a775c13186208d3685dfd (commit)
       via  fced3381bd3e4cfba379a50107f5dc27dda93aea (commit)
       via  c5a89fc450c90216ac2100e537189638a35bdc02 (commit)
       via  6fbd4412cfbef5b55c8aa75ac129f3eda55d4aa5 (commit)
       via  e17cd76f84c85ea408bf727165b713ca8de2ace5 (commit)
       via  163c8f8750193b791eb62f5a8d73dc44a006b69e (commit)
       via  1c05663663b1002dee41cebc7d766fd43345033e (commit)
       via  d1e00d89dac87929d39c0689a593f0574980f2e8 (commit)
       via  245e4cafdb8146307c1df7e36a46604a0fff4c5d (commit)
       via  eae02bfd9730fa968865ab1cce65666e2653957b (commit)
       via  0d1836a8d4d5a0c0802881c2878a35f611e09e1f (commit)
       via  4bb449eb541e7bc22dfb09c31451d2258f189495 (commit)
       via  d162b10b084348a56eac784f6600ece297a65803 (commit)
       via  7337b18bf7b6996a7fe4df0aba5356a03bda452d (commit)
       via  9363a29fadbb1af352652e639f2c4cdfd1336d4d (commit)
       via  75487f9f9c705186e41221ff0c52474edbf5a9d0 (commit)
       via  4620478d694697eff07e501187d784c6c98ccfa9 (commit)
       via  c5e7cbef5659a7e5e6560d5ff7cfebde7dd97527 (commit)
       via  6bbdc069109b1a51e39456b78c6727b63a9c0e1d (commit)
       via  a974fc22eeef0c417eb9f41bda556baa89a5ea68 (commit)
       via  8b2eb7d1fa47e4a691849a485573c5da79bb1e2e (commit)
       via  10a159a92c6b3163df50dbb80fa5a1570d0e2d23 (commit)
       via  40104312e89869bd008c156a2897caf1667e9c10 (commit)
       via  c4b9e5167a6196ad26121816fa82f400a2d7a9d2 (commit)
       via  735143cbf278a87d975f71152471d59795836460 (commit)
       via  29b8fe537aeb93c7974c8d8ec5f8f7cd0e67bdb3 (commit)
       via  949d940b5f75114f86d381c347f0ba8f26b3e9b9 (commit)
       via  61248e8459dd3c99a62056739609f75231f9e385 (commit)
       via  7b86c93c104581fbf7a1fc9bfeb1ae300b9ee29e (commit)
       via  58f1b2d26ba08e28307f45731be157ad61feb8ce (commit)
       via  73e01bb3eaef21828ec2ae802440077752b5b7e9 (commit)
       via  13e7ad8135a0bafc3d1d225ff7e4c62de4f3b43f (commit)
       via  bf5c9ddab89fb7392950c9f6edd83e5e497969f2 (commit)
       via  3b0327f9a02dcde5bac5ec37433ff6fb1c42dc51 (commit)
       via  7d804c0b62975b4059dea757dbc2fbd0320c1497 (commit)
      from  a27b2bf3e33a80213a42dcf1e01144209eb2603a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 71fd4da18b22100682ae7e2079aadfd66360d310
Merge: a641be48f c5a89fc45
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Feb 1 16:29:17 2019 -0500

    14325: Merge branch 'master'
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>


commit a641be48fcab06e2efd701fddb1b276c159f26cc
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Feb 1 16:26:46 2019 -0500

    14325: Rephrase confusing conditions and add comments.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
index b2d601d46..e6b506298 100644
--- a/lib/dispatchcloud/worker/pool.go
+++ b/lib/dispatchcloud/worker/pool.go
@@ -204,7 +204,14 @@ func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
 		creating[it] = len(times)
 	}
 	for _, wkr := range wp.workers {
-		if !(wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown) || wkr.idleBehavior != IdleBehaviorRun || len(wkr.running) > 0 {
+		// Skip workers that are not expected to become
+		// available soon. Note len(wkr.running)>0 is not
+		// redundant here: it can be true even in
+		// StateUnknown.
+		if wkr.state == StateShutdown ||
+			wkr.state == StateRunning ||
+			wkr.idleBehavior != IdleBehaviorRun ||
+			len(wkr.running) > 0 {
 			continue
 		}
 		it := wkr.instType
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index baa56adde..78ebaac6e 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -247,11 +247,21 @@ func (wkr *worker) probeAndUpdate() {
 		// advantage of the non-busy state, though.
 		wkr.busy = updateTime
 	}
-	running := map[string]struct{}{}
 	changed := false
+
+	// Build a new "running" map. Set changed=true if it differs
+	// from the existing map (wkr.running) to ensure the scheduler
+	// gets notified below.
+	running := map[string]struct{}{}
 	for _, uuid := range ctrUUIDs {
 		running[uuid] = struct{}{}
 		if _, ok := wkr.running[uuid]; !ok {
+			if _, ok := wkr.starting[uuid]; !ok {
+				// We didn't start it -- it must have
+				// been started by a previous
+				// dispatcher process.
+				logger.WithField("ContainerUUID", uuid).Info("crunch-run process detected")
+			}
 			changed = true
 		}
 	}
@@ -262,20 +272,30 @@ func (wkr *worker) probeAndUpdate() {
 			changed = true
 		}
 	}
+
+	// Update state if this was the first successful boot-probe.
 	if booted && (wkr.state == StateUnknown || wkr.state == StateBooting) {
 		// Note: this will change again below if
 		// len(wkr.starting)+len(wkr.running) > 0.
 		wkr.state = StateIdle
 		changed = true
-	} else if wkr.state == StateUnknown && len(running) != len(wkr.running) {
+	}
+
+	// If wkr.state and wkr.running aren't changing then there's
+	// no need to log anything, notify the scheduler, move state
+	// back and forth between idle/running, etc.
+	if !changed {
+		return
+	}
+
+	// Log whenever a run-probe reveals crunch-run processes
+	// appearing/disappearing before boot-probe succeeds.
+	if wkr.state == StateUnknown && len(running) != len(wkr.running) {
 		logger.WithFields(logrus.Fields{
 			"RunningContainers": len(running),
 			"State":             wkr.state,
 		}).Info("crunch-run probe succeeded, but boot probe is still failing")
 	}
-	if !changed {
-		return
-	}
 
 	wkr.running = running
 	if wkr.state == StateIdle && len(wkr.starting)+len(wkr.running) > 0 {
@@ -333,6 +353,7 @@ func (wkr *worker) probeBooted() (ok bool, stderr []byte) {
 // caller must have lock.
 func (wkr *worker) shutdownIfBroken(dur time.Duration) {
 	if wkr.idleBehavior == IdleBehaviorHold {
+		// Never shut down.
 		return
 	}
 	label, threshold := "", wkr.wp.timeoutProbe
@@ -353,16 +374,21 @@ func (wkr *worker) shutdownIfBroken(dur time.Duration) {
 // caller must have lock.
 func (wkr *worker) shutdownIfIdle() bool {
 	if wkr.idleBehavior == IdleBehaviorHold {
-		return false
-	}
-	if !(wkr.state == StateIdle || (wkr.state == StateBooting && wkr.idleBehavior == IdleBehaviorDrain)) {
+		// Never shut down.
 		return false
 	}
 	age := time.Since(wkr.busy)
-	if wkr.idleBehavior != IdleBehaviorDrain && age < wkr.wp.timeoutIdle {
+
+	old := age >= wkr.wp.timeoutIdle
+	draining := wkr.idleBehavior == IdleBehaviorDrain
+	shouldShutdown := ((old || draining) && wkr.state == StateIdle) ||
+		(draining && wkr.state == StateBooting)
+	if !shouldShutdown {
 		return false
 	}
+
 	wkr.logger.WithFields(logrus.Fields{
+		"State":        wkr.state,
 		"Age":          age,
 		"IdleBehavior": wkr.idleBehavior,
 	}).Info("shutdown idle worker")

commit affd909987cedc1d2418ff4d9cb031f01ffe4deb
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Feb 1 16:25:59 2019 -0500

    14325: Improve identifiers/comments.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go
index d49d0166a..847fe9e27 100644
--- a/lib/dispatchcloud/container/queue.go
+++ b/lib/dispatchcloud/container/queue.go
@@ -184,7 +184,9 @@ func (cq *Queue) Update() error {
 	cq.mtx.Lock()
 	defer cq.mtx.Unlock()
 	for uuid, ctr := range next {
-		if _, keep := cq.dontupdate[uuid]; keep {
+		if _, dontupdate := cq.dontupdate[uuid]; dontupdate {
+			// Don't clobber a local update that happened
+			// after we started polling.
 			continue
 		}
 		if cur, ok := cq.current[uuid]; !ok {
@@ -195,11 +197,16 @@ func (cq *Queue) Update() error {
 		}
 	}
 	for uuid := range cq.current {
-		if _, keep := cq.dontupdate[uuid]; keep {
+		if _, dontupdate := cq.dontupdate[uuid]; dontupdate {
+			// Don't expunge an entry that was
+			// added/updated locally after we started
+			// polling.
 			continue
-		} else if _, keep = next[uuid]; keep {
-			continue
-		} else {
+		} else if _, stillpresent := next[uuid]; !stillpresent {
+			// Expunge an entry that no longer appears in
+			// the poll response (evidently it's
+			// cancelled, completed, deleted, or taken by
+			// a different dispatcher).
 			delete(cq.current, uuid)
 		}
 	}

commit 138ea2112594804cbb2a775c13186208d3685dfd
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Feb 1 16:23:07 2019 -0500

    14325: Note assumption concurrent dispatchers share a VM size menu.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go
index 464e31186..d49d0166a 100644
--- a/lib/dispatchcloud/container/queue.go
+++ b/lib/dispatchcloud/container/queue.go
@@ -212,6 +212,9 @@ func (cq *Queue) Update() error {
 func (cq *Queue) addEnt(uuid string, ctr arvados.Container) {
 	it, err := cq.chooseType(&ctr)
 	if err != nil && (ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked) {
+		// We assume here that any chooseType error is a hard
+		// error: it wouldn't help to try again, or to leave
+		// it for a different dispatcher process to attempt.
 		errorString := err.Error()
 		cq.logger.WithField("ContainerUUID", ctr.UUID).Warn("cancel container with no suitable instance type")
 		go func() {

commit fced3381bd3e4cfba379a50107f5dc27dda93aea
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Fri Feb 1 16:22:41 2019 -0500

    14325: Fix test.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/lib/dispatchcloud/container/queue_test.go b/lib/dispatchcloud/container/queue_test.go
index 75326921f..91d65359e 100644
--- a/lib/dispatchcloud/container/queue_test.go
+++ b/lib/dispatchcloud/container/queue_test.go
@@ -6,12 +6,14 @@ package container
 
 import (
 	"errors"
+	"os"
 	"sync"
 	"testing"
 	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"github.com/sirupsen/logrus"
 	check "gopkg.in/check.v1"
 )
 
@@ -22,6 +24,14 @@ func Test(t *testing.T) {
 
 var _ = check.Suite(&IntegrationSuite{})
 
+func logger() logrus.FieldLogger {
+	logger := logrus.StandardLogger()
+	if os.Getenv("ARVADOS_DEBUG") != "" {
+		logger.SetLevel(logrus.DebugLevel)
+	}
+	return logger
+}
+
 type IntegrationSuite struct{}
 
 func (suite *IntegrationSuite) TearDownTest(c *check.C) {
@@ -35,7 +45,7 @@ func (suite *IntegrationSuite) TestGetLockUnlockCancel(c *check.C) {
 	}
 
 	client := arvados.NewClientFromEnv()
-	cq := NewQueue(test.Logger(), nil, typeChooser, client)
+	cq := NewQueue(logger(), nil, typeChooser, client)
 
 	err := cq.Update()
 	c.Check(err, check.IsNil)
@@ -92,7 +102,7 @@ func (suite *IntegrationSuite) TestCancelIfNoInstanceType(c *check.C) {
 	}
 
 	client := arvados.NewClientFromEnv()
-	cq := NewQueue(test.Logger(), nil, errorTypeChooser, client)
+	cq := NewQueue(logger(), nil, errorTypeChooser, client)
 
 	var ctr arvados.Container
 	err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list