[ARVADOS] created: 1.1.3-144-gc04e0a8
Git user
git at public.curoverse.com
Tue Mar 6 15:01:59 EST 2018
at c04e0a81076b9e0c71bdd4f9b07b7ececf4d2685 (commit)
commit c04e0a81076b9e0c71bdd4f9b07b7ececf4d2685
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Tue Mar 6 14:52:29 2018 -0500
13078: Ensure compute0 always has all the instancetype=X features.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/lib/dispatchcloud/node_size.go b/lib/dispatchcloud/node_size.go
index 41c6ff4..ed33260 100644
--- a/lib/dispatchcloud/node_size.go
+++ b/lib/dispatchcloud/node_size.go
@@ -5,7 +5,6 @@
package dispatchcloud
import (
- "bytes"
"errors"
"log"
"os/exec"
@@ -92,31 +91,27 @@ func SlurmNodeTypeFeatureKludge(cc *arvados.Cluster) {
}
}
-var (
- slurmDummyNode = "compute0"
- slurmErrBadFeature = "Invalid feature"
- slurmErrNoNodes = "node configuration is not available"
-)
+const slurmDummyNode = "compute0"
func slurmKludge(features []string) {
- cmd := exec.Command("srun", "--test-only", "--constraint="+strings.Join(features, "&"), "false")
- out, err := cmd.CombinedOutput()
- switch {
- case err == nil || bytes.Contains(out, []byte(slurmErrNoNodes)):
- // Evidently our node-type feature names are all valid.
+ allFeatures := strings.Join(features, ",")
- case bytes.Contains(out, []byte(slurmErrBadFeature)):
- log.Printf("temporarily configuring node %q with all node type features", slurmDummyNode)
- for _, nodeFeatures := range []string{strings.Join(features, ","), ""} {
- cmd = exec.Command("scontrol", "update", "NodeName="+slurmDummyNode, "Features="+nodeFeatures)
- log.Printf("running: %q %q", cmd.Path, cmd.Args)
- out, err := cmd.CombinedOutput()
- if err != nil {
- log.Printf("error: scontrol: %s (output was %q)", err, out)
- }
- }
+ cmd := exec.Command("sinfo", "--nodes="+slurmDummyNode, "--format=%f", "--noheader")
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ log.Printf("running %q %q: %s (output was %q)", cmd.Path, cmd.Args, err, out)
+ return
+ }
+ if string(out) == allFeatures+"\n" {
+ // Already configured correctly, nothing to do.
+ return
+ }
- default:
- log.Printf("warning: expected srun error %q, %q, or success, but output was %q", slurmErrBadFeature, slurmErrNoNodes, out)
+ log.Printf("configuring node %q with all node type features", slurmDummyNode)
+ cmd = exec.Command("scontrol", "update", "NodeName="+slurmDummyNode, "Features="+allFeatures)
+ log.Printf("running: %q %q", cmd.Path, cmd.Args)
+ out, err = cmd.CombinedOutput()
+ if err != nil {
+ log.Printf("error: scontrol: %s (output was %q)", err, out)
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list