[ARVADOS] created: 1.1.4-363-g652b52a

Git user git at public.curoverse.com
Thu Jun 7 13:55:43 EDT 2018


        at  652b52a64f9f697394e81bdf1f91352a6131d0db (commit)


commit 652b52a64f9f697394e81bdf1f91352a6131d0db
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date:   Thu Jun 7 14:51:26 2018 -0300

    13581: Adds ErrConstraintsNotSatisfiable error reason to user logs.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>

diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
index 9e3baab..f7c53c8 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -219,19 +219,20 @@ func (disp *Dispatcher) slurmConstraintArgs(container arvados.Container) []strin
 	}
 }
 
-func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error) {
+func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, string, error) {
 	var args []string
+	var reason string
 	args = append(args, disp.SbatchArguments...)
 	args = append(args, "--job-name="+container.UUID, fmt.Sprintf("--nice=%d", initialNiceValue))
 
 	if disp.cluster == nil {
 		// no instance types configured
 		args = append(args, disp.slurmConstraintArgs(container)...)
-	} else if it, err := dispatchcloud.ChooseInstanceType(disp.cluster, &container); err == dispatchcloud.ErrInstanceTypesNotConfigured {
+	} else if it, reason, err := dispatchcloud.ChooseInstanceType(disp.cluster, &container); err == dispatchcloud.ErrInstanceTypesNotConfigured {
 		// ditto
 		args = append(args, disp.slurmConstraintArgs(container)...)
 	} else if err != nil {
-		return nil, err
+		return nil, reason, err
 	} else {
 		// use instancetype constraint instead of slurm mem/cpu/tmp specs
 		args = append(args, "--constraint=instancetype="+it.Name)
@@ -241,10 +242,10 @@ func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error
 		args = append(args, "--partition="+strings.Join(container.SchedulingParameters.Partitions, ","))
 	}
 
-	return args, nil
+	return args, reason, nil
 }
 
-func (disp *Dispatcher) submit(container arvados.Container, crunchRunCommand []string) error {
+func (disp *Dispatcher) submit(container arvados.Container, crunchRunCommand []string) (string, error) {
 	// append() here avoids modifying crunchRunCommand's
 	// underlying array, which is shared with other goroutines.
 	crArgs := append([]string(nil), crunchRunCommand...)
@@ -254,12 +255,12 @@ func (disp *Dispatcher) submit(container arvados.Container, crunchRunCommand []s
 	disp.sqCheck.L.Lock()
 	defer disp.sqCheck.L.Unlock()
 
-	sbArgs, err := disp.sbatchArgs(container)
+	sbArgs, reason, err := disp.sbatchArgs(container)
 	if err != nil {
-		return err
+		return reason, err
 	}
 	log.Printf("running sbatch %+q", sbArgs)
-	return disp.slurm.Batch(crScript, sbArgs)
+	return reason, disp.slurm.Batch(crScript, sbArgs)
 }
 
 // Submit a container to the slurm queue (or resume monitoring if it's
@@ -272,10 +273,10 @@ func (disp *Dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
 
 	if ctr.State == dispatch.Locked && !disp.sqCheck.HasUUID(ctr.UUID) {
 		log.Printf("Submitting container %s to slurm", ctr.UUID)
-		if err := disp.submit(ctr, disp.CrunchRunCommand); err != nil {
+		if reason, err := disp.submit(ctr, disp.CrunchRunCommand); err != nil {
 			var text string
 			if err == dispatchcloud.ErrConstraintsNotSatisfiable {
-				text = fmt.Sprintf("cannot run container %s: %s", ctr.UUID, err)
+				text = fmt.Sprintf("cannot run container %s: %s (%s)", ctr.UUID, err, reason)
 				disp.UpdateState(ctr.UUID, dispatch.Cancelled)
 			} else {
 				text = fmt.Sprintf("Error submitting container %s to slurm: %s", ctr.UUID, err)
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
index 85617cf..2753477 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
@@ -353,7 +353,7 @@ func (s *StubbedSuite) TestSbatchArgs(c *C) {
 		c.Logf("%#v", defaults)
 		s.disp.SbatchArguments = defaults
 
-		args, err := s.disp.sbatchArgs(container)
+		args, _, err := s.disp.sbatchArgs(container)
 		c.Check(args, DeepEquals, append(defaults, "--job-name=123", "--nice=10000", "--mem=239", "--cpus-per-task=2", "--tmp=0"))
 		c.Check(err, IsNil)
 	}
@@ -369,6 +369,7 @@ func (s *StubbedSuite) TestSbatchInstanceTypeConstraint(c *C) {
 	for _, trial := range []struct {
 		types      []arvados.InstanceType
 		sbatchArgs []string
+		reason     string
 		err        error
 	}{
 		// Choose node type => use --constraint arg
@@ -391,14 +392,16 @@ func (s *StubbedSuite) TestSbatchInstanceTypeConstraint(c *C) {
 			types: []arvados.InstanceType{
 				{Name: "a1.tiny", Price: 0.02, RAM: 128000000, VCPUs: 1},
 			},
-			err: dispatchcloud.ErrConstraintsNotSatisfiable,
+			reason: "max RAM available: 128000000",
+			err:    dispatchcloud.ErrConstraintsNotSatisfiable,
 		},
 	} {
 		c.Logf("%#v", trial)
 		s.disp.cluster = &arvados.Cluster{InstanceTypes: trial.types}
 
-		args, err := s.disp.sbatchArgs(container)
+		args, reason, err := s.disp.sbatchArgs(container)
 		c.Check(err, Equals, trial.err)
+		c.Check(reason, Equals, trial.reason)
 		if trial.err == nil {
 			c.Check(args, DeepEquals, append([]string{"--job-name=123", "--nice=10000"}, trial.sbatchArgs...))
 		}
@@ -413,7 +416,7 @@ func (s *StubbedSuite) TestSbatchPartition(c *C) {
 		Priority:             1,
 	}
 
-	args, err := s.disp.sbatchArgs(container)
+	args, _, err := s.disp.sbatchArgs(container)
 	c.Check(args, DeepEquals, []string{
 		"--job-name=123", "--nice=10000",
 		"--mem=239", "--cpus-per-task=1", "--tmp=0",

commit 30e06a4b88088ab23b7082965088a770c799dff9
Author: Lucas Di Pentima <ldipentima at veritasgenetics.com>
Date:   Thu Jun 7 14:11:00 2018 -0300

    13581: Updates ChooseInstanceType func to return error reasons.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima at veritasgenetics.com>

diff --git a/lib/dispatchcloud/node_size.go b/lib/dispatchcloud/node_size.go
index 2ca4050..bfa6ec7 100644
--- a/lib/dispatchcloud/node_size.go
+++ b/lib/dispatchcloud/node_size.go
@@ -6,6 +6,7 @@ package dispatchcloud
 
 import (
 	"errors"
+	"fmt"
 	"log"
 	"os/exec"
 	"strings"
@@ -22,7 +23,7 @@ var (
 
 // ChooseInstanceType returns the cheapest available
 // arvados.InstanceType big enough to run ctr.
-func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvados.InstanceType, err error) {
+func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvados.InstanceType, errReason string, err error) {
 	if len(cc.InstanceTypes) == 0 {
 		err = ErrInstanceTypesNotConfigured
 		return
@@ -40,13 +41,21 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvad
 	needRAM := ctr.RuntimeConstraints.RAM + ctr.RuntimeConstraints.KeepCacheRAM
 	needRAM = (needRAM * 100) / int64(100-discountConfiguredRAMPercent)
 
+	maxSpecsAvailable := make(map[string]int64)
+
 	err = ErrConstraintsNotSatisfiable
 	for _, it := range cc.InstanceTypes {
+		maxSpecsAvailable["Scratch"] = maxInt64(maxSpecsAvailable["Scratch"], it.Scratch)
+		maxSpecsAvailable["VCPUs"] = maxInt64(maxSpecsAvailable["VCPUs"], int64(it.VCPUs))
+		maxSpecsAvailable["RAM"] = maxInt64(maxSpecsAvailable["RAM"], it.RAM)
 		switch {
 		case err == nil && it.Price > best.Price:
 		case it.Scratch < needScratch:
+			errReason = fmt.Sprintf("max scratch space available: %d", maxSpecsAvailable["Scratch"])
 		case it.RAM < needRAM:
+			errReason = fmt.Sprintf("max RAM available: %d", maxSpecsAvailable["RAM"])
 		case it.VCPUs < needVCPUs:
+			errReason = fmt.Sprintf("max VCPUs available: %d", maxSpecsAvailable["VCPUs"])
 		case it.Price == best.Price && (it.RAM < best.RAM || it.VCPUs < best.VCPUs):
 			// Equal price, but worse specs
 		default:
@@ -55,6 +64,9 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvad
 			err = nil
 		}
 	}
+	if err == nil {
+		errReason = ""
+	}
 	return
 }
 
@@ -115,3 +127,10 @@ func slurmKludge(features []string) {
 		log.Printf("error: scontrol: %s (output was %q)", err, out)
 	}
 }
+
+func maxInt64(a, b int64) int64 {
+	if a > b {
+		return a
+	}
+	return b
+}
diff --git a/lib/dispatchcloud/node_size_test.go b/lib/dispatchcloud/node_size_test.go
index 0c02a0e..0c6e5d6 100644
--- a/lib/dispatchcloud/node_size_test.go
+++ b/lib/dispatchcloud/node_size_test.go
@@ -16,7 +16,7 @@ const GiB = int64(1 << 30)
 type NodeSizeSuite struct{}
 
 func (*NodeSizeSuite) TestChooseNotConfigured(c *check.C) {
-	_, err := ChooseInstanceType(&arvados.Cluster{}, &arvados.Container{
+	_, _, err := ChooseInstanceType(&arvados.Cluster{}, &arvados.Container{
 		RuntimeConstraints: arvados.RuntimeConstraints{
 			RAM:   1234567890,
 			VCPUs: 2,
@@ -27,12 +27,13 @@ func (*NodeSizeSuite) TestChooseNotConfigured(c *check.C) {
 
 func (*NodeSizeSuite) TestChooseUnsatisfiable(c *check.C) {
 	checkUnsatisfiable := func(ctr *arvados.Container) {
-		_, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: []arvados.InstanceType{
+		_, reason, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: []arvados.InstanceType{
 			{Price: 1.1, RAM: 1000000000, VCPUs: 2, Name: "small1"},
 			{Price: 2.2, RAM: 2000000000, VCPUs: 4, Name: "small2"},
 			{Price: 4.4, RAM: 4000000000, VCPUs: 8, Name: "small4", Scratch: GiB},
 		}}, ctr)
 		c.Check(err, check.Equals, ErrConstraintsNotSatisfiable)
+		c.Check(reason, check.Not(check.Equals), "")
 	}
 
 	for _, rc := range []arvados.RuntimeConstraints{
@@ -74,7 +75,7 @@ func (*NodeSizeSuite) TestChoose(c *check.C) {
 			{Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
 		},
 	} {
-		best, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu}, &arvados.Container{
+		best, _, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu}, &arvados.Container{
 			Mounts: map[string]arvados.Mount{
 				"/tmp": {Kind: "tmp", Capacity: 2 * GiB},
 			},

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list