[arvados] created: 2.1.0-2940-ge71e91157

Tue Oct 4 17:40:44 UTC 2022

at  e71e91157b526302209973f104307b0ef1a2c2cd (commit)


commit e71e91157b526302209973f104307b0ef1a2c2cd
Author: Tom Clegg <tom at curii.com>
Date:   Tue Oct 4 11:30:40 2022 -0400

    19418: LSF: use InstanceTypes to detect unsatisfiable constraints.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
index 37adffd18..ded244046 100644
--- a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
+++ b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid
@@ -62,7 +62,7 @@ Alternatively, you can arrange for the arvados-dispatch-lsf process to run as an
 </notextile>
 
 
-h3(#SbatchArguments). Containers.LSF.BsubArgumentsList
+h3(#BsubArgumentsList). Containers.LSF.BsubArgumentsList
 
 When arvados-dispatch-lsf invokes @bsub@, you can add arguments to the command by specifying @BsubArgumentsList at .  You can use this to send the jobs to specific cluster partitions or add resource requests.  Set @BsubArgumentsList@ to an array of strings.
 
@@ -87,7 +87,7 @@ For example:
 
 Note that the default value for @BsubArgumentsList@ uses the @-o@ and @-e@ arguments to write stdout/stderr data to files in @/tmp@ on the compute nodes, which is helpful for troubleshooting installation/configuration problems. Ensure you have something in place to delete old files from @/tmp@, or adjust these arguments accordingly.
 
-h3(#SbatchArguments). Containers.LSF.BsubCUDAArguments
+h3(#BsubCUDAArguments). Containers.LSF.BsubCUDAArguments
 
 If the container requests access to GPUs (@runtime_constraints.cuda.device_count@ of the container request is greater than zero), the command line arguments in @BsubCUDAArguments@ will be added to the command line _after_ @BsubArgumentsList at .  This should consist of the additional @bsub@ flags your site requires to schedule the job on a node with GPU support.  Set @BsubCUDAArguments@ to an array of strings.  For example:
 
@@ -98,7 +98,7 @@ If the container requests access to GPUs (@runtime_constraints.cuda.device_count
 </pre>
 </notextile>
 
-h3(#PollPeriod). Containers.PollInterval
+h3(#PollInterval). Containers.PollInterval
 
 arvados-dispatch-lsf polls the API server periodically for new containers to run.  The @PollInterval@ option controls how often this poll happens.  Set this to a string of numbers suffixed with one of the time units @s@, @m@, or @h at .  For example:
 
@@ -122,7 +122,7 @@ Supports suffixes @KB@, @KiB@, @MB@, @MiB@, @GB@, @GiB@, @TB@, @TiB@, @PB@, @PiB
 </notextile>
 
 
-h3(#CrunchRunCommand-network). Containers.CrunchRunArgumentList: Using host networking for containers
+h3(#CrunchRunArgumentList). Containers.CrunchRunArgumentList: Using host networking for containers
 
 Older Linux kernels (prior to 3.18) have bugs in network namespace handling which can lead to compute node lockups.  This by is indicated by blocked kernel tasks in "Workqueue: netns cleanup_net".   If you are experiencing this problem, as a workaround you can disable use of network namespaces by Docker across the cluster.  Be aware this reduces container isolation, which may be a security risk.
 
@@ -134,6 +134,37 @@ Older Linux kernels (prior to 3.18) have bugs in network namespace handling whic
 </pre>
 </notextile>
 
+
+h3(#InstanceTypes). InstanceTypes: Avoid submitting jobs with unsatisfiable resource constraints
+
+LSF does not provide feedback when a submitted job's RAM, CPU, or disk space constraints cannot be satisfied by any node: the job will wait in the queue indefinitely with "pending" status, reported by Arvados as "queued".
+
+As a workaround, you can configure @InstanceTypes@ with your LSF cluster's compute node sizes. Arvados will use these sizes to determine when a container is impossible to run, and cancel it instead of submitting an LSF job.
+
+Apart from detecting non-runnable containers, the configured instance types will not have any effect on scheduling.
+
+<notextile>
+<pre>    InstanceTypes:
+      most-ram:
+        VCPUs: 8
+        RAM: 640GiB
+        IncludedScratch: 640GB
+      most-cpus:
+        VCPUs: 32
+        RAM: 256GiB
+        IncludedScratch: 640GB
+      gpu:
+        VCPUs: 8
+        RAM: 256GiB
+        IncludedScratch: 640GB
+        CUDA:
+          DriverVersion: "11.4"
+          HardwareCapability: "7.5"
+          DeviceCount: 1
+</pre>
+</notextile>
+
+
 {% assign arvados_component = 'arvados-dispatch-lsf' %}
 
 {% include 'install_packages' %}
diff --git a/lib/lsf/dispatch.go b/lib/lsf/dispatch.go
index e2348337e..d362f66d1 100644
--- a/lib/lsf/dispatch.go
+++ b/lib/lsf/dispatch.go
@@ -170,6 +170,19 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
 	if ctr.State != dispatch.Locked {
 		// already started by prior invocation
 	} else if _, ok := disp.lsfqueue.Lookup(ctr.UUID); !ok {
+		if _, err := dispatchcloud.ChooseInstanceType(disp.Cluster, &ctr); errors.As(err, &dispatchcloud.ConstraintsNotSatisfiableError{}) {
+			err := disp.arvDispatcher.Arv.Update("containers", ctr.UUID, arvadosclient.Dict{
+				"container": map[string]interface{}{
+					"runtime_status": map[string]string{
+						"error": err.Error(),
+					},
+				},
+			}, nil)
+			if err != nil {
+				return fmt.Errorf("error setting runtime_status on %s: %s", ctr.UUID, err)
+			}
+			return disp.arvDispatcher.UpdateState(ctr.UUID, dispatch.Cancelled)
+		}
 		disp.logger.Printf("Submitting container %s to LSF", ctr.UUID)
 		cmd := []string{disp.Cluster.Containers.CrunchRunCommand}
 		cmd = append(cmd, "--runtime-engine="+disp.Cluster.Containers.RuntimeEngine)
@@ -184,9 +197,8 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
 	defer disp.logger.Printf("Done monitoring container %s", ctr.UUID)
 
 	go func(uuid string) {
-		cancelled := false
 		for ctx.Err() == nil {
-			qent, ok := disp.lsfqueue.Lookup(uuid)
+			_, ok := disp.lsfqueue.Lookup(uuid)
 			if !ok {
 				// If the container disappears from
 				// the lsf queue, there is no point in
@@ -196,25 +208,6 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
 				cancel()
 				return
 			}
-			if !cancelled && qent.Stat == "PEND" && strings.Contains(qent.PendReason, "There are no suitable hosts for the job") {
-				disp.logger.Printf("container %s: %s", uuid, qent.PendReason)
-				err := disp.arvDispatcher.Arv.Update("containers", uuid, arvadosclient.Dict{
-					"container": map[string]interface{}{
-						"runtime_status": map[string]string{
-							"error": qent.PendReason,
-						},
-					},
-				}, nil)
-				if err != nil {
-					disp.logger.Printf("error setting runtime_status on %s: %s", uuid, err)
-					continue // retry
-				}
-				err = disp.arvDispatcher.UpdateState(uuid, dispatch.Cancelled)
-				if err != nil {
-					continue // retry (UpdateState() already logged the error)
-				}
-				cancelled = true
-			}
 		}
 	}(ctr.UUID)
 
diff --git a/lib/lsf/dispatch_test.go b/lib/lsf/dispatch_test.go
index a99983f34..e51e71906 100644
--- a/lib/lsf/dispatch_test.go
+++ b/lib/lsf/dispatch_test.go
@@ -32,6 +32,7 @@ var _ = check.Suite(&suite{})
 type suite struct {
 	disp          *dispatcher
 	crTooBig      arvados.ContainerRequest
+	crPending     arvados.ContainerRequest
 	crCUDARequest arvados.ContainerRequest
 }
 
@@ -46,6 +47,13 @@ func (s *suite) SetUpTest(c *check.C) {
 	c.Assert(err, check.IsNil)
 	cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second / 4)
 	cluster.Containers.MinRetryPeriod = arvados.Duration(time.Second / 4)
+	cluster.InstanceTypes = arvados.InstanceTypeMap{
+		"biggest_available_node": arvados.InstanceType{
+			RAM:             100 << 30, // 100 GiB
+			VCPUs:           4,
+			IncludedScratch: 100 << 30,
+			Scratch:         100 << 30,
+		}}
 	s.disp = newHandler(context.Background(), cluster, arvadostest.Dispatch1Token, prometheus.NewRegistry()).(*dispatcher)
 	s.disp.lsfcli.stubCommand = func(string, ...string) *exec.Cmd {
 		return exec.Command("bash", "-c", "echo >&2 unimplemented stub; false")
@@ -67,6 +75,23 @@ func (s *suite) SetUpTest(c *check.C) {
 	})
 	c.Assert(err, check.IsNil)
 
+	err = arvados.NewClientFromEnv().RequestAndDecode(&s.crPending, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
+		"container_request": map[string]interface{}{
+			"runtime_constraints": arvados.RuntimeConstraints{
+				RAM:   100000000,
+				VCPUs: 2,
+			},
+			"container_image":     arvadostest.DockerImage112PDH,
+			"command":             []string{"sleep", "1"},
+			"mounts":              map[string]arvados.Mount{"/mnt/out": {Kind: "tmp", Capacity: 1000}},
+			"output_path":         "/mnt/out",
+			"state":               arvados.ContainerRequestStateCommitted,
+			"priority":            1,
+			"container_count_max": 1,
+		},
+	})
+	c.Assert(err, check.IsNil)
+
 	err = arvados.NewClientFromEnv().RequestAndDecode(&s.crCUDARequest, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
 		"container_request": map[string]interface{}{
 			"runtime_constraints": arvados.RuntimeConstraints{
@@ -150,15 +175,15 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
 				fakejobq[nextjobid] = args[1]
 				nextjobid++
 				mtx.Unlock()
-			case s.crTooBig.ContainerUUID:
+			case s.crPending.ContainerUUID:
 				c.Check(args, check.DeepEquals, []string{
-					"-J", s.crTooBig.ContainerUUID,
-					"-n", "1",
-					"-D", "954187MB",
-					"-R", "rusage[mem=954187MB:tmp=256MB] span[hosts=1]",
-					"-R", "select[mem>=954187MB]",
+					"-J", s.crPending.ContainerUUID,
+					"-n", "2",
+					"-D", "608MB",
+					"-R", "rusage[mem=608MB:tmp=256MB] span[hosts=1]",
+					"-R", "select[mem>=608MB]",
 					"-R", "select[tmp>=256MB]",
-					"-R", "select[ncpus>=1]"})
+					"-R", "select[ncpus>=2]"})
 				mtx.Lock()
 				fakejobq[nextjobid] = args[1]
 				nextjobid++
@@ -187,7 +212,7 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
 			var records []map[string]interface{}
 			for jobid, uuid := range fakejobq {
 				stat, reason := "RUN", ""
-				if uuid == s.crTooBig.ContainerUUID {
+				if uuid == s.crPending.ContainerUUID {
 					// The real bjobs output includes a trailing ';' here:
 					stat, reason = "PEND", "There are no suitable hosts for the job;"
 				}
@@ -242,23 +267,28 @@ func (s *suite) TestSubmit(c *check.C) {
 			c.Error("timed out")
 			break
 		}
+		// "crTooBig" should never be submitted to lsf because
+		// it is bigger than any configured instance type
+		if ent, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
+			c.Errorf("Lookup(crTooBig) == true, ent = %#v", ent)
+			break
+		}
 		// "queuedcontainer" should be running
 		if _, ok := s.disp.lsfqueue.Lookup(arvadostest.QueuedContainerUUID); !ok {
 			c.Log("Lookup(queuedcontainer) == false")
 			continue
 		}
+		// "crPending" should be pending
+		if ent, ok := s.disp.lsfqueue.Lookup(s.crPending.ContainerUUID); !ok {
+			c.Logf("Lookup(crPending) == false", ent)
+			continue
+		}
 		// "lockedcontainer" should be cancelled because it
 		// has priority 0 (no matching container requests)
 		if ent, ok := s.disp.lsfqueue.Lookup(arvadostest.LockedContainerUUID); ok {
 			c.Logf("Lookup(lockedcontainer) == true, ent = %#v", ent)
 			continue
 		}
-		// "crTooBig" should be cancelled because lsf stub
-		// reports there is no suitable instance type
-		if ent, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
-			c.Logf("Lookup(crTooBig) == true, ent = %#v", ent)
-			continue
-		}
 		var ctr arvados.Container
 		if err := s.disp.arvDispatcher.Arv.Get("containers", arvadostest.LockedContainerUUID, nil, &ctr); err != nil {
 			c.Logf("error getting container state for %s: %s", arvadostest.LockedContainerUUID, err)
@@ -275,7 +305,7 @@ func (s *suite) TestSubmit(c *check.C) {
 			c.Logf("container %s is not in the LSF queue but its arvados record has not been updated to state==Cancelled (state is %q)", s.crTooBig.ContainerUUID, ctr.State)
 			continue
 		} else {
-			c.Check(ctr.RuntimeStatus["error"], check.Equals, "There are no suitable hosts for the job;")
+			c.Check(ctr.RuntimeStatus["error"], check.Equals, "constraints not satisfiable by any configured instance type")
 		}
 		c.Log("reached desired state")
 		break

-----------------------------------------------------------------------


hooks/post-receive
--