[arvados] updated: 2.7.0-5289-g88aedea4fd

git repository hosting git at public.arvados.org
Thu Nov 2 19:47:19 UTC 2023


Summary of changes:
 lib/cloud/ec2/ec2.go                     |  6 ++-
 lib/cloud/ec2/ec2_test.go                | 20 +++++++---
 lib/dispatchcloud/node_size.go           |  6 +--
 lib/dispatchcloud/scheduler/run_queue.go | 66 +++++++++++++++++---------------
 4 files changed, 56 insertions(+), 42 deletions(-)

       via  88aedea4fdf827524c620830ec11681e5cd5f527 (commit)
       via  66cee5a8021e73271650e0997ca7f757e419d169 (commit)
       via  da204dccd3df12b5c885068768f95c84e4703047 (commit)
      from  1028c0630dac2a2bff363da1390bbf942e7fe7ae (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 88aedea4fdf827524c620830ec11681e5cd5f527
Author: Tom Clegg <tom at curii.com>
Date:   Thu Nov 2 15:37:16 2023 -0400

    20978: Add sort-by-scratch fallback.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/node_size.go b/lib/dispatchcloud/node_size.go
index 0a5a79bc70..802bc65c28 100644
--- a/lib/dispatchcloud/node_size.go
+++ b/lib/dispatchcloud/node_size.go
@@ -178,9 +178,9 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) ([]arvados.
 			// if same price and RAM, prefer more VCPUs
 			return types[i].VCPUs > types[j].VCPUs
 		}
-		if types[i].VCPUs != types[j].VCPUs {
-			// if same price and RAM, prefer more VCPUs
-			return types[i].VCPUs > types[j].VCPUs
+		if types[i].Scratch != types[j].Scratch {
+			// if same price and RAM and VCPUs, prefer more scratch
+			return types[i].Scratch > types[j].Scratch
 		}
 		// no preference, just sort the same way each time
 		return types[i].Name < types[j].Name

commit 66cee5a8021e73271650e0997ca7f757e419d169
Author: Tom Clegg <tom at curii.com>
Date:   Thu Nov 2 15:35:43 2023 -0400

    20978: Rearrange large if-else sequence for clarity.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
index 8264c5ef0a..2f1f175890 100644
--- a/lib/dispatchcloud/scheduler/run_queue.go
+++ b/lib/dispatchcloud/scheduler/run_queue.go
@@ -235,9 +235,28 @@ tryrun:
 			}
 			trying++
 			if unallocOK {
+				// We have a suitable instance type,
+				// so mark it as allocated, and try to
+				// start the container.
 				unalloc[unallocType]--
 				logger = logger.WithField("InstanceType", unallocType)
-			} else if sch.pool.AtQuota() {
+				if dontstart[unallocType] {
+					// We already tried & failed to start
+					// a higher-priority container on the
+					// same instance type. Don't let this
+					// one sneak in ahead of it.
+				} else if sch.pool.KillContainer(ctr.UUID, "about to start") {
+					logger.Info("not restarting yet: crunch-run process from previous attempt has not exited")
+				} else if sch.pool.StartContainer(unallocType, ctr) {
+					logger.Trace("StartContainer => true")
+				} else {
+					logger.Trace("StartContainer => false")
+					containerAllocatedWorkerBootingCount += 1
+					dontstart[unallocType] = true
+				}
+				continue
+			}
+			if sch.pool.AtQuota() {
 				// Don't let lower-priority containers
 				// starve this one by using keeping
 				// idle workers alive on different
@@ -245,7 +264,8 @@ tryrun:
 				logger.Trace("overquota")
 				overquota = sorted[i:]
 				break tryrun
-			} else if !availableOK {
+			}
+			if !availableOK {
 				// Continue trying lower-priority
 				// containers in case they can run on
 				// different instance types that are
@@ -261,40 +281,24 @@ tryrun:
 				// container B now.
 				logger.Trace("all eligible types at capacity")
 				continue
-			} else if logger = logger.WithField("InstanceType", availableType); sch.pool.Create(availableType) {
-				// Success. (Note pool.Create works
-				// asynchronously and does its own
-				// logging about the eventual outcome,
-				// so we don't need to.)
-				logger.Info("creating new instance")
-				// Don't bother trying to start the
-				// container yet -- obviously the
-				// instance will take some time to
-				// boot and become ready.
-				containerAllocatedWorkerBootingCount += 1
-				dontstart[availableType] = true
-				continue
-			} else {
+			}
+			logger = logger.WithField("InstanceType", availableType)
+			if !sch.pool.Create(availableType) {
 				// Failed despite not being at quota,
 				// e.g., cloud ops throttled.
 				logger.Trace("pool declined to create new instance")
 				continue
 			}
-
-			if dontstart[unallocType] {
-				// We already tried & failed to start
-				// a higher-priority container on the
-				// same instance type. Don't let this
-				// one sneak in ahead of it.
-			} else if sch.pool.KillContainer(ctr.UUID, "about to start") {
-				logger.Info("not restarting yet: crunch-run process from previous attempt has not exited")
-			} else if sch.pool.StartContainer(unallocType, ctr) {
-				logger.Trace("StartContainer => true")
-			} else {
-				logger.Trace("StartContainer => false")
-				containerAllocatedWorkerBootingCount += 1
-				dontstart[unallocType] = true
-			}
+			// Success. (Note pool.Create works
+			// asynchronously and does its own logging
+			// about the eventual outcome, so we don't
+			// need to.)
+			logger.Info("creating new instance")
+			// Don't bother trying to start the container
+			// yet -- obviously the instance will take
+			// some time to boot and become ready.
+			containerAllocatedWorkerBootingCount += 1
+			dontstart[availableType] = true
 		}
 	}
 

commit da204dccd3df12b5c885068768f95c84e4703047
Author: Tom Clegg <tom at curii.com>
Date:   Thu Nov 2 15:26:56 2023 -0400

    20978: Treat "unsupported instance type" as capacity=0.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/cloud/ec2/ec2.go b/lib/cloud/ec2/ec2.go
index 816df48d90..55f9a1e3a3 100644
--- a/lib/cloud/ec2/ec2.go
+++ b/lib/cloud/ec2/ec2.go
@@ -711,7 +711,8 @@ func isErrorSubnetSpecific(err error) bool {
 	code := aerr.Code()
 	return strings.Contains(code, "Subnet") ||
 		code == "InsufficientInstanceCapacity" ||
-		code == "InsufficientVolumeCapacity"
+		code == "InsufficientVolumeCapacity" ||
+		code == "Unsupported"
 }
 
 type ec2QuotaError struct {
@@ -737,7 +738,8 @@ func wrapError(err error, throttleValue *atomic.Value) error {
 		return rateLimitError{error: err, earliestRetry: time.Now().Add(d)}
 	} else if isErrorQuota(err) {
 		return &ec2QuotaError{err}
-	} else if aerr, ok := err.(awserr.Error); ok && aerr != nil && aerr.Code() == "InsufficientInstanceCapacity" {
+	} else if aerr, ok := err.(awserr.Error); ok && (aerr.Code() == "InsufficientInstanceCapacity" ||
+		(aerr.Code() == "Unsupported" && strings.Contains(aerr.Message(), "requested instance type"))) {
 		return &capacityError{err, true}
 	} else if err != nil {
 		throttleValue.Store(time.Duration(0))
diff --git a/lib/cloud/ec2/ec2_test.go b/lib/cloud/ec2/ec2_test.go
index a57fcebf76..6ce5aa3cf9 100644
--- a/lib/cloud/ec2/ec2_test.go
+++ b/lib/cloud/ec2/ec2_test.go
@@ -513,10 +513,18 @@ func (*EC2InstanceSetSuite) TestWrapError(c *check.C) {
 	_, ok = wrapped.(cloud.QuotaError)
 	c.Check(ok, check.Equals, true)
 
-	capacityError := awserr.New("InsufficientInstanceCapacity", "", nil)
-	wrapped = wrapError(capacityError, nil)
-	caperr, ok := wrapped.(cloud.CapacityError)
-	c.Check(ok, check.Equals, true)
-	c.Check(caperr.IsCapacityError(), check.Equals, true)
-	c.Check(caperr.IsInstanceTypeSpecific(), check.Equals, true)
+	for _, trial := range []struct {
+		code string
+		msg  string
+	}{
+		{"InsufficientInstanceCapacity", ""},
+		{"Unsupported", "Your requested instance type (t3.micro) is not supported in your requested Availability Zone (us-east-1e). Please retry your request by not specifying an Availability Zone or choosing us-east-1a, us-east-1b, us-east-1c, us-east-1d, us-east-1f."},
+	} {
+		capacityError := awserr.New(trial.code, trial.msg, nil)
+		wrapped = wrapError(capacityError, nil)
+		caperr, ok := wrapped.(cloud.CapacityError)
+		c.Check(ok, check.Equals, true)
+		c.Check(caperr.IsCapacityError(), check.Equals, true)
+		c.Check(caperr.IsInstanceTypeSpecific(), check.Equals, true)
+	}
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list