[ARVADOS] updated: 1.1.4-363-gc914354
Git user
git at public.curoverse.com
Wed Jun 6 13:15:56 EDT 2018
Summary of changes:
apps/workbench/test/integration_helper.rb | 2 ++
build/run-tests.sh | 8 +++--
sdk/go/keepclient/keepclient.go | 1 +
sdk/go/keepclient/keepclient_test.go | 23 ++++++++++----
sdk/go/keepclient/support.go | 3 ++
services/api/app/models/container.rb | 8 ++---
services/api/app/models/container_request.rb | 2 --
services/api/lib/update_priority.rb | 38 +++++++++++++++++++++++
services/keepproxy/keepproxy.go | 9 ++++++
services/keepproxy/keepproxy_test.go | 46 ++++++++++++++++++++++------
10 files changed, 113 insertions(+), 27 deletions(-)
create mode 100644 services/api/lib/update_priority.rb
discards c02297f8766827d827c34260f75e438e806e9c9e (commit)
discards c4aaa3492128b1f438d4992223f65ddf0aec084d (commit)
via c9143544609d90da33eb3c2d566fc5d6a25188b2 (commit)
via d5dd5dc2838fe420c8f835975bad7052a0df55e5 (commit)
via 0809440ad583a2556c0f97fd000be9e9b7f94eb5 (commit)
via 7654764e5192fc8d34e612a6ee6608fad33a91c2 (commit)
via 0eedf70afa34167275d2135837e866b13fac4178 (commit)
via cae5f7def9dae9caf9056bc3bb77ca801cfa2229 (commit)
via e80efb5caae2acbe50ea8a1bd1151b0d2a67c1c9 (commit)
via 7b7950766b7a77cd6c2d6ecee7603dd7394c4144 (commit)
via 24af6ae1b23a4fd82c816d077c0178f0d75db2d7 (commit)
via 4869ae0f12b5a4a293c66f7304483fee1addae52 (commit)
via 7e6ac5a4967614cbe59ed5c0ec41c8be4d4cff4d (commit)
via dd97299b6b3ce82b6a57e2838fbe3f81bb51f140 (commit)
via ed7698518d7eabea0f4e86acfb0660765fe31071 (commit)
This update added new revisions after undoing existing revisions. That is
to say, the old revision is not a strict subset of the new revision. This
situation occurs when you --force push a change and generate a repository
containing something like this:
* -- * -- B -- O -- O -- O (c02297f8766827d827c34260f75e438e806e9c9e)
\
N -- N -- N (c9143544609d90da33eb3c2d566fc5d6a25188b2)
When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit c9143544609d90da33eb3c2d566fc5d6a25188b2
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Jun 6 13:14:26 2018 -0400
13427: Handle same backend device mounted RW in multiple places.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index 92ee3c2..5aad8e6 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -576,11 +576,12 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
}
})
- // Servers and mounts (with or without existing
+ // Servers/mounts/devices (with or without existing
// replicas) that are part of the best achievable
// layout for this storage class.
wantSrv := map[*KeepService]bool{}
wantMnt := map[*KeepMount]bool{}
+ wantDev := map[string]bool{}
// Positions (with existing replicas) that have been
// protected (via unsafeToDelete) to ensure we don't
// reduce replication below desired level when
@@ -592,6 +593,12 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
// and returns true if all requirements are met.
trySlot := func(i int) bool {
slot := slots[i]
+ if wantDev[slot.mnt.DeviceID] {
+ // Already allocated a replica to this
+ // backend device, possibly on a
+ // different server.
+ return false
+ }
if len(protMnt) < desired && slot.repl != nil {
unsafeToDelete[slot.repl.Mtime] = true
protMnt[slot.mnt] = true
@@ -600,6 +607,9 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
slots[i].want = true
wantSrv[slot.mnt.KeepService] = true
wantMnt[slot.mnt] = true
+ if slot.mnt.DeviceID != "" {
+ wantDev[slot.mnt.DeviceID] = true
+ }
}
return len(protMnt) >= desired && len(wantMnt) >= desired
}
@@ -644,6 +654,16 @@ func (bal *Balancer) balanceBlock(blkid arvados.SizedDigest, blk *BlockState) ba
cs.unachievable = true
classState[class] = cs
}
+
+ // Avoid deleting wanted replicas from devices that
+ // are mounted on multiple servers -- even if they
+ // haven't already been added to unsafeToDelete
+ // because the servers report different Mtimes.
+ for _, slot := range slots {
+ if slot.repl != nil && wantDev[slot.mnt.DeviceID] {
+ unsafeToDelete[slot.repl.Mtime] = true
+ }
+ }
}
// TODO: If multiple replicas are trashable, prefer the oldest
diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go
index 94c4cd2..602c42e 100644
--- a/services/keep-balance/balance_test.go
+++ b/services/keep-balance/balance_test.go
@@ -259,6 +259,57 @@ func (bal *balancerSuite) TestDedupDevices(c *check.C) {
shouldPull: slots{2}})
}
+func (bal *balancerSuite) TestDeviceRWMountedByMultipleServers(c *check.C) {
+ bal.srvs[0].mounts[0].KeepMount.DeviceID = "abcdef"
+ bal.srvs[9].mounts[0].KeepMount.DeviceID = "abcdef"
+ bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
+ // block 0 belongs on servers 3 and e, which have different
+ // device IDs.
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{1},
+ shouldPull: slots{0}})
+ // block 1 belongs on servers 0 and 9, which both report
+ // having a replica, but the replicas are on the same device
+ // ID -- so we should pull to the third position (7).
+ bal.try(c, tester{
+ known: 1,
+ desired: map[string]int{"default": 2},
+ current: slots{0, 1},
+ shouldPull: slots{2}})
+ // block 1 can be pulled to the doubly-mounted device, but the
+ // pull should only be done on the first of the two servers.
+ bal.try(c, tester{
+ known: 1,
+ desired: map[string]int{"default": 2},
+ current: slots{2},
+ shouldPull: slots{0}})
+ // block 0 has one replica on a single device mounted on two
+ // servers (e,9 at positions 1,9). Trashing the replica on 9
+ // would lose the block.
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{1, 9},
+ shouldPull: slots{0}})
+ // block 0 is overreplicated, but the second and third
+ // replicas are the same replica according to DeviceID
+ // (despite different Mtimes). Don't trash the third replica.
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{0, 1, 9}})
+ // block 0 is overreplicated; the third and fifth replicas are
+ // extra, but the fourth is another view of the second and
+ // shouldn't be trashed.
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{0, 1, 5, 9, 12},
+ shouldTrash: slots{5, 12}})
+}
+
func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
// For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
// probe order. For these tests we give it two mounts, one
commit d5dd5dc2838fe420c8f835975bad7052a0df55e5
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Mon Jun 4 22:10:37 2018 -0400
13427: Ignore readonly devices mounted read-write elsewhere.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go
index d6a2dde..92ee3c2 100644
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -95,6 +95,7 @@ func (bal *Balancer) Run(config Config, runOptions RunOptions) (nextRunOptions R
return
}
}
+ bal.dedupDevices()
if err = bal.CheckSanityEarly(&config.Client); err != nil {
return
@@ -169,6 +170,30 @@ func (bal *Balancer) DiscoverKeepServices(c *arvados.Client, okTypes []string) e
})
}
+func (bal *Balancer) dedupDevices() {
+ rwdev := map[string]*KeepService{}
+ for _, srv := range bal.KeepServices {
+ for _, mnt := range srv.mounts {
+ if !mnt.ReadOnly && mnt.DeviceID != "" {
+ rwdev[mnt.DeviceID] = srv
+ }
+ }
+ }
+ // Drop the readonly mounts whose device is mounted RW
+ // elsewhere.
+ for _, srv := range bal.KeepServices {
+ var dedup []*KeepMount
+ for _, mnt := range srv.mounts {
+ if mnt.ReadOnly && rwdev[mnt.DeviceID] != nil {
+ bal.logf("skipping srv %s readonly mount %q because same device %q is mounted read-write on srv %s", srv, mnt.UUID, mnt.DeviceID, rwdev[mnt.DeviceID])
+ } else {
+ dedup = append(dedup, mnt)
+ }
+ }
+ srv.mounts = dedup
+ }
+}
+
// CheckSanityEarly checks for configuration and runtime errors that
// can be detected before GetCurrentState() and ComputeChangeSets()
// are called.
diff --git a/services/keep-balance/balance_test.go b/services/keep-balance/balance_test.go
index cfdd47f..94c4cd2 100644
--- a/services/keep-balance/balance_test.go
+++ b/services/keep-balance/balance_test.go
@@ -245,6 +245,20 @@ func (bal *balancerSuite) TestDecreaseReplBlockTooNew(c *check.C) {
shouldTrash: slots{2}})
}
+func (bal *balancerSuite) TestDedupDevices(c *check.C) {
+ bal.srvs[3].mounts[0].KeepMount.ReadOnly = true
+ bal.srvs[3].mounts[0].KeepMount.DeviceID = "abcdef"
+ bal.srvs[14].mounts[0].KeepMount.DeviceID = "abcdef"
+ c.Check(len(bal.srvs[3].mounts), check.Equals, 1)
+ bal.dedupDevices()
+ c.Check(len(bal.srvs[3].mounts), check.Equals, 0)
+ bal.try(c, tester{
+ known: 0,
+ desired: map[string]int{"default": 2},
+ current: slots{1},
+ shouldPull: slots{2}})
+}
+
func (bal *balancerSuite) TestChangeStorageClasses(c *check.C) {
// For known blocks 0/1/2/3, server 9 is slot 9/1/14/0 in
// probe order. For these tests we give it two mounts, one
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list