[ARVADOS] created: 1.1.3-68-gda17cdc

Thu Feb 15 18:20:40 EST 2018

at  da17cdccd11d66a10cbc3bf7fbd8c84b49d4a67c (commit)


commit da17cdccd11d66a10cbc3bf7fbd8c84b49d4a67c
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Feb 15 18:17:52 2018 -0500

    12552: Add SLURM niceness calculator.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/crunch-dispatch-slurm/priority.go b/services/crunch-dispatch-slurm/priority.go
new file mode 100644
index 0000000..1445d2e
--- /dev/null
+++ b/services/crunch-dispatch-slurm/priority.go
@@ -0,0 +1,56 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import "git.curoverse.com/arvados.git/sdk/go/arvados"
+
+type slurmJob struct {
+	ctr      *arvados.Container
+	priority int64 // current slurm priority (incorporates nice value)
+	nice     int64 // current slurm nice value
+}
+
+// wantNice calculates appropriate nice values for a set of SLURM
+// jobs. The returned slice will have len(jobs) elements.
+//
+// spread is a non-negative amount of space to leave between adjacent
+// priorities when making adjustments. Generally, increasing spread
+// reduces the total number of adjustments made. A smaller spread
+// produces lower nice values, which is useful for old SLURM versions
+// with a limited "nice" range and for sites where SLURM is also
+// running non-Arvados jobs with low nice values.
+func wantNice(jobs []slurmJob, spread int64) []int64 {
+	if len(jobs) == 0 {
+		return nil
+	}
+	renice := make([]int64, len(jobs))
+
+	// highest usable priority (without going out of order)
+	var target int64
+	for i, job := range jobs {
+		if i == 0 {
+			// renice[0] is always zero, so our highest
+			// priority container gets the highest
+			// possible slurm priority.
+			target = job.priority + job.nice
+		} else if space := target - job.priority; space >= 0 && space < spread*10 {
+			// Ordering is correct, and interval isn't too
+			// large. Leave existing nice value alone.
+			renice[i] = job.nice
+			target = job.priority
+		} else {
+			target -= spread
+			if possible := job.priority + job.nice; target > possible {
+				// renice[i] is already 0, that's the
+				// best we can do
+				target = possible
+			} else {
+				renice[i] = possible - target
+			}
+		}
+		target--
+	}
+	return renice
+}
diff --git a/services/crunch-dispatch-slurm/priority_test.go b/services/crunch-dispatch-slurm/priority_test.go
new file mode 100644
index 0000000..a2da4d2
--- /dev/null
+++ b/services/crunch-dispatch-slurm/priority_test.go
@@ -0,0 +1,141 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+	. "gopkg.in/check.v1"
+)
+
+var _ = Suite(&PrioritySuite{})
+
+type PrioritySuite struct{}
+
+func (s *PrioritySuite) TestReniceCorrect(c *C) {
+	for _, test := range []struct {
+		spread int64
+		in     []slurmJob
+		out    []int64
+	}{
+		{
+			0,
+			nil,
+			nil,
+		},
+		{
+			0,
+			[]slurmJob{},
+			nil,
+		},
+		{
+			10,
+			[]slurmJob{{priority: 4294000111, nice: 10000}},
+			[]int64{0},
+		},
+		{
+			10,
+			[]slurmJob{
+				{priority: 4294000111, nice: 10000},
+				{priority: 4294000111, nice: 10000},
+				{priority: 4294000111, nice: 10000},
+				{priority: 4294000111, nice: 10000},
+			},
+			[]int64{0, 11, 22, 33},
+		},
+		{ // smaller spread than necessary, but correctly ordered => leave nice alone
+			10,
+			[]slurmJob{
+				{priority: 4294000113, nice: 0},
+				{priority: 4294000112, nice: 1},
+				{priority: 4294000111, nice: 99},
+			},
+			[]int64{0, 1, 99},
+		},
+		{ // larger spread than necessary, but less than 10x => leave nice alone
+			10,
+			[]slurmJob{
+				{priority: 4294000144, nice: 0},
+				{priority: 4294000122, nice: 22},
+				{priority: 4294000111, nice: 33},
+			},
+			[]int64{0, 22, 33},
+		},
+		{ // > 10x spread => reduce nice to achieve spread=10
+			10,
+			[]slurmJob{
+				{priority: 4000, nice: 0},    // max pri 4000
+				{priority: 3000, nice: 999},  // max pri 3999
+				{priority: 2000, nice: 1998}, // max pri 3998
+			},
+			[]int64{0, 10, 20},
+		},
+		{ // > 10x spread, but spread=10 is impossible without negative nice
+			10,
+			[]slurmJob{
+				{priority: 4000, nice: 0},    // max pri 4000
+				{priority: 3000, nice: 500},  // max pri 3500
+				{priority: 2000, nice: 2000}, // max pri 4000
+			},
+			[]int64{0, 0, 511},
+		},
+		{ // reorder
+			10,
+			[]slurmJob{
+				{priority: 4000, nice: 0}, // max pri 4000
+				{priority: 5000, nice: 0}, // max pri 5000
+				{priority: 6000, nice: 0}, // max pri 6000
+			},
+			[]int64{0, 1011, 2022},
+		},
+		{ // zero spread
+			0,
+			[]slurmJob{
+				{priority: 4000, nice: 0}, // max pri 4000
+				{priority: 5000, nice: 0}, // max pri 5000
+				{priority: 6000, nice: 0}, // max pri 6000
+				{priority: 3000, nice: 0}, // max pri 3000
+			},
+			[]int64{0, 1001, 2002, 0},
+		},
+	} {
+		c.Logf("spread=%d %+v -> %+v", test.spread, test.in, test.out)
+		c.Check(wantNice(test.in, test.spread), DeepEquals, test.out)
+
+		if len(test.in) == 0 {
+			continue
+		}
+		// After making the adjustments, calling wantNice
+		// again should return the same recommendations.
+		updated := make([]slurmJob, len(test.in))
+		for i, in := range test.in {
+			updated[i].nice = test.out[i]
+			updated[i].priority = in.priority + in.nice - test.out[i]
+		}
+		c.Check(wantNice(updated, test.spread), DeepEquals, test.out)
+	}
+}
+
+func (s *PrioritySuite) TestReniceChurn(c *C) {
+	const spread = 10
+	jobs := make([]slurmJob, 1000)
+	for i := range jobs {
+		jobs[i] = slurmJob{priority: 4294000000 - int64(i), nice: 10000}
+	}
+	adjustments := 0
+	queue := jobs
+	for len(queue) > 0 {
+		renice := wantNice(queue, spread)
+		for i := range queue {
+			if renice[i] == queue[i].nice {
+				continue
+			}
+			queue[i].priority += queue[i].nice - renice[i]
+			queue[i].nice = renice[i]
+			adjustments++
+		}
+		queue = queue[1:]
+	}
+	c.Logf("processed queue of %d with %d renice ops", len(jobs), adjustments)
+	c.Check(adjustments < len(jobs)*len(jobs)/10, Equals, true)
+}

-----------------------------------------------------------------------


hooks/post-receive
--