[ARVADOS] updated: 1.3.0-183-g8052381fb
Git user
git at public.curoverse.com
Fri Jan 18 16:33:34 EST 2019
Summary of changes:
lib/dispatchcloud/container/queue.go | 49 ++++++++++++++++++++++++++-----
lib/dispatchcloud/container/queue_test.go | 4 +--
sdk/go/arvados/container.go | 31 +++++++++----------
3 files changed, 59 insertions(+), 25 deletions(-)
discards 3bf30090d15962ea34b761ce8ca5b43a972ba7f1 (commit)
via 8052381fb4e7aceb52497e8378b596178cf5af7c (commit)
This update added new revisions after undoing existing revisions. That is
to say, the old revision is not a strict subset of the new revision. This
situation occurs when you --force push a change and generate a repository
containing something like this:
* -- * -- B -- O -- O -- O (3bf30090d15962ea34b761ce8ca5b43a972ba7f1)
\
N -- N -- N (8052381fb4e7aceb52497e8378b596178cf5af7c)
When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 8052381fb4e7aceb52497e8378b596178cf5af7c
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Fri Jan 18 16:28:45 2019 -0500
14325: Cancel containers with unsatisfiable runtime constraints.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go
index 965407e51..7a41d47c3 100644
--- a/lib/dispatchcloud/container/queue.go
+++ b/lib/dispatchcloud/container/queue.go
@@ -211,9 +211,41 @@ func (cq *Queue) Update() error {
func (cq *Queue) addEnt(uuid string, ctr arvados.Container) {
it, err := cq.chooseType(&ctr)
- if err != nil {
- // FIXME: throttle warnings, cancel after timeout
- cq.logger.Warnf("cannot run %s", &ctr)
+ if err != nil && (ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked) {
+ errorString := err.Error()
+ cq.logger.WithField("ContainerUUID", ctr.UUID).Warn("cancel container with no suitable instance type")
+ go func() {
+ var err error
+ defer func() {
+ if err == nil {
+ return
+ }
+ // On failure, check current container
+ // state, and don't log the error if
+ // the failure came from losing a
+ // race.
+ var latest arvados.Container
+ cq.client.RequestAndDecode(&latest, "GET", "arvados/v1/containers/"+ctr.UUID, nil, map[string][]string{"select": {"state"}})
+ if latest.State == arvados.ContainerStateCancelled {
+ return
+ }
+ cq.logger.WithField("ContainerUUID", ctr.UUID).WithError(err).Warn("error while trying to cancel unsatisfiable container")
+ }()
+ if ctr.State == arvados.ContainerStateQueued {
+ err = cq.Lock(ctr.UUID)
+ if err != nil {
+ return
+ }
+ }
+ err = cq.setRuntimeError(ctr.UUID, errorString)
+ if err != nil {
+ return
+ }
+ err = cq.Cancel(ctr.UUID)
+ if err != nil {
+ return
+ }
+ }()
return
}
cq.current[uuid] = QueueEnt{Container: ctr, InstanceType: it}
@@ -229,6 +261,18 @@ func (cq *Queue) Unlock(uuid string) error {
return cq.apiUpdate(uuid, "unlock")
}
+// setRuntimeError sets runtime_status["error"] to the given value.
+// Container should already have state==Locked or Running.
+func (cq *Queue) setRuntimeError(uuid, errorString string) error {
+ return cq.client.RequestAndDecode(nil, "PUT", "arvados/v1/containers/"+uuid, nil, map[string]map[string]map[string]interface{}{
+ "container": {
+ "runtime_status": {
+ "error": errorString,
+ },
+ },
+ })
+}
+
// Cancel cancels the given container.
func (cq *Queue) Cancel(uuid string) error {
err := cq.client.RequestAndDecode(nil, "PUT", "arvados/v1/containers/"+uuid, nil, map[string]map[string]interface{}{
diff --git a/lib/dispatchcloud/container/queue_test.go b/lib/dispatchcloud/container/queue_test.go
index 9d2f83090..a84497424 100644
--- a/lib/dispatchcloud/container/queue_test.go
+++ b/lib/dispatchcloud/container/queue_test.go
@@ -5,6 +5,7 @@
package container
import (
+ "errors"
"sync"
"testing"
"time"
@@ -24,9 +25,18 @@ var _ = check.Suite(&IntegrationSuite{})
type IntegrationSuite struct{}
-func (*IntegrationSuite) TestControllerBackedQueue(c *check.C) {
+func (suite *IntegrationSuite) TearDownTest(c *check.C) {
+ err := arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
+ c.Check(err, check.IsNil)
+}
+
+func (suite *IntegrationSuite) TestGetLockUnlockCancel(c *check.C) {
+ typeChooser := func(ctr *arvados.Container) (arvados.InstanceType, error) {
+ return arvados.InstanceType{Name: "testType"}, nil
+ }
+
client := arvados.NewClientFromEnv()
- cq := NewQueue(logrus.StandardLogger(), nil, testTypeChooser, client)
+ cq := NewQueue(logrus.StandardLogger(), nil, typeChooser, client)
err := cq.Update()
c.Check(err, check.IsNil)
@@ -77,6 +87,36 @@ func (*IntegrationSuite) TestControllerBackedQueue(c *check.C) {
c.Check(err, check.ErrorMatches, `.*State cannot change from Complete to Cancelled.*`)
}
-func testTypeChooser(ctr *arvados.Container) (arvados.InstanceType, error) {
- return arvados.InstanceType{Name: "testType"}, nil
+func (suite *IntegrationSuite) TestCancelIfNoInstanceType(c *check.C) {
+ errorTypeChooser := func(ctr *arvados.Container) (arvados.InstanceType, error) {
+ return arvados.InstanceType{}, errors.New("no suitable instance type")
+ }
+
+ client := arvados.NewClientFromEnv()
+ cq := NewQueue(logrus.StandardLogger(), nil, errorTypeChooser, client)
+
+ var ctr arvados.Container
+ err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
+ c.Check(err, check.IsNil)
+ c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
+
+ cq.Update()
+
+ // Wait for the cancel operation to take effect. Container
+ // will have state=Cancelled or just disappear from the queue.
+ suite.waitfor(c, time.Second, func() bool {
+ err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
+ return err == nil && ctr.State == arvados.ContainerStateCancelled
+ })
+ c.Check(ctr.RuntimeStatus["error"], check.Equals, `no suitable instance type`)
+}
+
+func (suite *IntegrationSuite) waitfor(c *check.C, timeout time.Duration, fn func() bool) {
+ defer func() {
+ c.Check(fn(), check.Equals, true)
+ }()
+ deadline := time.Now().Add(timeout)
+ for !fn() && time.Now().Before(deadline) {
+ time.Sleep(timeout / 1000)
+ }
}
diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go
index 02a0d76de..fb095481b 100644
--- a/sdk/go/arvados/container.go
+++ b/sdk/go/arvados/container.go
@@ -8,21 +8,22 @@ import "time"
// Container is an arvados#container resource.
type Container struct {
- UUID string `json:"uuid"`
- CreatedAt time.Time `json:"created_at"`
- Command []string `json:"command"`
- ContainerImage string `json:"container_image"`
- Cwd string `json:"cwd"`
- Environment map[string]string `json:"environment"`
- LockedByUUID string `json:"locked_by_uuid"`
- Mounts map[string]Mount `json:"mounts"`
- Output string `json:"output"`
- OutputPath string `json:"output_path"`
- Priority int64 `json:"priority"`
- RuntimeConstraints RuntimeConstraints `json:"runtime_constraints"`
- State ContainerState `json:"state"`
- SchedulingParameters SchedulingParameters `json:"scheduling_parameters"`
- ExitCode int `json:"exit_code"`
+ UUID string `json:"uuid"`
+ CreatedAt time.Time `json:"created_at"`
+ Command []string `json:"command"`
+ ContainerImage string `json:"container_image"`
+ Cwd string `json:"cwd"`
+ Environment map[string]string `json:"environment"`
+ LockedByUUID string `json:"locked_by_uuid"`
+ Mounts map[string]Mount `json:"mounts"`
+ Output string `json:"output"`
+ OutputPath string `json:"output_path"`
+ Priority int64 `json:"priority"`
+ RuntimeConstraints RuntimeConstraints `json:"runtime_constraints"`
+ State ContainerState `json:"state"`
+ SchedulingParameters SchedulingParameters `json:"scheduling_parameters"`
+ ExitCode int `json:"exit_code"`
+ RuntimeStatus map[string]interface{} `json:"runtime_status"`
}
// Container is an arvados#container resource.
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list