[ARVADOS] created: 1.1.3-68-gda17cdc
Git user
git at public.curoverse.com
Thu Feb 15 18:20:40 EST 2018
at da17cdccd11d66a10cbc3bf7fbd8c84b49d4a67c (commit)
commit da17cdccd11d66a10cbc3bf7fbd8c84b49d4a67c
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Thu Feb 15 18:17:52 2018 -0500
12552: Add SLURM niceness calculator.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/crunch-dispatch-slurm/priority.go b/services/crunch-dispatch-slurm/priority.go
new file mode 100644
index 0000000..1445d2e
--- /dev/null
+++ b/services/crunch-dispatch-slurm/priority.go
@@ -0,0 +1,56 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import "git.curoverse.com/arvados.git/sdk/go/arvados"
+
+type slurmJob struct {
+ ctr *arvados.Container
+ priority int64 // current slurm priority (incorporates nice value)
+ nice int64 // current slurm nice value
+}
+
+// wantNice calculates appropriate nice values for a set of SLURM
+// jobs. The returned slice will have len(jobs) elements.
+//
+// spread is a non-negative amount of space to leave between adjacent
+// priorities when making adjustments. Generally, increasing spread
+// reduces the total number of adjustments made. A smaller spread
+// produces lower nice values, which is useful for old SLURM versions
+// with a limited "nice" range and for sites where SLURM is also
+// running non-Arvados jobs with low nice values.
+func wantNice(jobs []slurmJob, spread int64) []int64 {
+ if len(jobs) == 0 {
+ return nil
+ }
+ renice := make([]int64, len(jobs))
+
+ // highest usable priority (without going out of order)
+ var target int64
+ for i, job := range jobs {
+ if i == 0 {
+ // renice[0] is always zero, so our highest
+ // priority container gets the highest
+ // possible slurm priority.
+ target = job.priority + job.nice
+ } else if space := target - job.priority; space >= 0 && space < spread*10 {
+ // Ordering is correct, and interval isn't too
+ // large. Leave existing nice value alone.
+ renice[i] = job.nice
+ target = job.priority
+ } else {
+ target -= spread
+ if possible := job.priority + job.nice; target > possible {
+ // renice[i] is already 0, that's the
+ // best we can do
+ target = possible
+ } else {
+ renice[i] = possible - target
+ }
+ }
+ target--
+ }
+ return renice
+}
diff --git a/services/crunch-dispatch-slurm/priority_test.go b/services/crunch-dispatch-slurm/priority_test.go
new file mode 100644
index 0000000..a2da4d2
--- /dev/null
+++ b/services/crunch-dispatch-slurm/priority_test.go
@@ -0,0 +1,141 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+ . "gopkg.in/check.v1"
+)
+
+var _ = Suite(&PrioritySuite{})
+
+type PrioritySuite struct{}
+
+func (s *PrioritySuite) TestReniceCorrect(c *C) {
+ for _, test := range []struct {
+ spread int64
+ in []slurmJob
+ out []int64
+ }{
+ {
+ 0,
+ nil,
+ nil,
+ },
+ {
+ 0,
+ []slurmJob{},
+ nil,
+ },
+ {
+ 10,
+ []slurmJob{{priority: 4294000111, nice: 10000}},
+ []int64{0},
+ },
+ {
+ 10,
+ []slurmJob{
+ {priority: 4294000111, nice: 10000},
+ {priority: 4294000111, nice: 10000},
+ {priority: 4294000111, nice: 10000},
+ {priority: 4294000111, nice: 10000},
+ },
+ []int64{0, 11, 22, 33},
+ },
+ { // smaller spread than necessary, but correctly ordered => leave nice alone
+ 10,
+ []slurmJob{
+ {priority: 4294000113, nice: 0},
+ {priority: 4294000112, nice: 1},
+ {priority: 4294000111, nice: 99},
+ },
+ []int64{0, 1, 99},
+ },
+ { // larger spread than necessary, but less than 10x => leave nice alone
+ 10,
+ []slurmJob{
+ {priority: 4294000144, nice: 0},
+ {priority: 4294000122, nice: 22},
+ {priority: 4294000111, nice: 33},
+ },
+ []int64{0, 22, 33},
+ },
+ { // > 10x spread => reduce nice to achieve spread=10
+ 10,
+ []slurmJob{
+ {priority: 4000, nice: 0}, // max pri 4000
+ {priority: 3000, nice: 999}, // max pri 3999
+ {priority: 2000, nice: 1998}, // max pri 3998
+ },
+ []int64{0, 10, 20},
+ },
+ { // > 10x spread, but spread=10 is impossible without negative nice
+ 10,
+ []slurmJob{
+ {priority: 4000, nice: 0}, // max pri 4000
+ {priority: 3000, nice: 500}, // max pri 3500
+ {priority: 2000, nice: 2000}, // max pri 4000
+ },
+ []int64{0, 0, 511},
+ },
+ { // reorder
+ 10,
+ []slurmJob{
+ {priority: 4000, nice: 0}, // max pri 4000
+ {priority: 5000, nice: 0}, // max pri 5000
+ {priority: 6000, nice: 0}, // max pri 6000
+ },
+ []int64{0, 1011, 2022},
+ },
+ { // zero spread
+ 0,
+ []slurmJob{
+ {priority: 4000, nice: 0}, // max pri 4000
+ {priority: 5000, nice: 0}, // max pri 5000
+ {priority: 6000, nice: 0}, // max pri 6000
+ {priority: 3000, nice: 0}, // max pri 3000
+ },
+ []int64{0, 1001, 2002, 0},
+ },
+ } {
+ c.Logf("spread=%d %+v -> %+v", test.spread, test.in, test.out)
+ c.Check(wantNice(test.in, test.spread), DeepEquals, test.out)
+
+ if len(test.in) == 0 {
+ continue
+ }
+ // After making the adjustments, calling wantNice
+ // again should return the same recommendations.
+ updated := make([]slurmJob, len(test.in))
+ for i, in := range test.in {
+ updated[i].nice = test.out[i]
+ updated[i].priority = in.priority + in.nice - test.out[i]
+ }
+ c.Check(wantNice(updated, test.spread), DeepEquals, test.out)
+ }
+}
+
+func (s *PrioritySuite) TestReniceChurn(c *C) {
+ const spread = 10
+ jobs := make([]slurmJob, 1000)
+ for i := range jobs {
+ jobs[i] = slurmJob{priority: 4294000000 - int64(i), nice: 10000}
+ }
+ adjustments := 0
+ queue := jobs
+ for len(queue) > 0 {
+ renice := wantNice(queue, spread)
+ for i := range queue {
+ if renice[i] == queue[i].nice {
+ continue
+ }
+ queue[i].priority += queue[i].nice - renice[i]
+ queue[i].nice = renice[i]
+ adjustments++
+ }
+ queue = queue[1:]
+ }
+ c.Logf("processed queue of %d with %d renice ops", len(jobs), adjustments)
+ c.Check(adjustments < len(jobs)*len(jobs)/10, Equals, true)
+}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list