[ARVADOS] created: 2.1.0-2449-g072247f9d
Git user
git at public.arvados.org
Tue May 10 15:27:27 UTC 2022
at 072247f9d094c7bcb3b72a91108d23cfa0861517 (commit)
commit 072247f9d094c7bcb3b72a91108d23cfa0861517
Author: Tom Clegg <tom at curii.com>
Date: Tue May 10 11:27:05 2022 -0400
18947: Move arvados-dispatch-slurm into arvados-server binary.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/build/run-build-packages.sh b/build/run-build-packages.sh
index 26705c066..c4415b469 100755
--- a/build/run-build-packages.sh
+++ b/build/run-build-packages.sh
@@ -248,7 +248,7 @@ package_go_binary cmd/arvados-server arvados-git-httpd "$FORMAT" "$ARCH" \
"Provide authenticated http access to Arvados-hosted git repositories"
package_go_binary services/crunch-dispatch-local crunch-dispatch-local "$FORMAT" "$ARCH" \
"Dispatch Crunch containers on the local system"
-package_go_binary services/crunch-dispatch-slurm crunch-dispatch-slurm "$FORMAT" "$ARCH" \
+package_go_binary cmd/arvados-server crunch-dispatch-slurm "$FORMAT" "$ARCH" \
"Dispatch Crunch containers to a SLURM cluster"
package_go_binary cmd/arvados-server crunch-run "$FORMAT" "$ARCH" \
"Supervise a single Crunch container"
diff --git a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
index 52553a35e..e55cfcec1 100644
--- a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
+++ b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
@@ -30,6 +30,15 @@ h2(#update-config). Update config.yml (optional)
Crunch-dispatch-slurm reads the common configuration file at @config.yml at .
+Add a DispatchSLURM entry to the Services section, using the hostname where @crunch-dispatch-slurm@ will run, and an available port:
+
+<notextile>
+<pre> Services:
+ DispatchSLURM:
+ InternalURLs:
+ "http://<code class="userinput">hostname.zzzzz.arvadosapi.com:9007</code>": {}</pre>
+</notextile>
+
The following configuration parameters are optional.
h3(#PollPeriod). Containers.PollInterval
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index e60880c21..893542df1 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -55,6 +55,9 @@ Clusters:
DispatchLSF:
InternalURLs: {SAMPLE: {}}
ExternalURL: ""
+ DispatchSLURM:
+ InternalURLs: {SAMPLE: {}}
+ ExternalURL: ""
Keepproxy:
InternalURLs: {SAMPLE: {}}
ExternalURL: ""
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index f0adcda5f..ace33c9ff 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -348,6 +348,7 @@ type Services struct {
Controller Service
DispatchCloud Service
DispatchLSF Service
+ DispatchSLURM Service
GitHTTP Service
GitSSH Service
Health Service
@@ -631,6 +632,7 @@ const (
ServiceNameController ServiceName = "arvados-controller"
ServiceNameDispatchCloud ServiceName = "arvados-dispatch-cloud"
ServiceNameDispatchLSF ServiceName = "arvados-dispatch-lsf"
+ ServiceNameDispatchSLURM ServiceName = "crunch-dispatch-slurm"
ServiceNameGitHTTP ServiceName = "arvados-git-httpd"
ServiceNameHealth ServiceName = "arvados-health"
ServiceNameKeepbalance ServiceName = "keep-balance"
@@ -650,6 +652,7 @@ func (svcs Services) Map() map[ServiceName]Service {
ServiceNameController: svcs.Controller,
ServiceNameDispatchCloud: svcs.DispatchCloud,
ServiceNameDispatchLSF: svcs.DispatchLSF,
+ ServiceNameDispatchSLURM: svcs.DispatchSLURM,
ServiceNameGitHTTP: svcs.GitHTTP,
ServiceNameHealth: svcs.Health,
ServiceNameKeepbalance: svcs.Keepbalance,
diff --git a/sdk/go/health/aggregator_test.go b/sdk/go/health/aggregator_test.go
index f8f7ff9f1..414902089 100644
--- a/sdk/go/health/aggregator_test.go
+++ b/sdk/go/health/aggregator_test.go
@@ -293,6 +293,7 @@ func (s *AggregatorSuite) setAllServiceURLs(listen string) {
&svcs.Controller,
&svcs.DispatchCloud,
&svcs.DispatchLSF,
+ &svcs.DispatchSLURM,
&svcs.GitHTTP,
&svcs.Keepbalance,
&svcs.Keepproxy,
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
index 84105e1fc..c31d79975 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -2,32 +2,48 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
-
// Dispatcher service for Crunch that submits containers to the slurm queue.
+package dispatchslurm
import (
"context"
- "flag"
"fmt"
"log"
"math"
+ "net/http"
"os"
"regexp"
"strings"
"time"
"git.arvados.org/arvados.git/lib/cmd"
- "git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/lib/dispatchcloud"
+ "git.arvados.org/arvados.git/lib/service"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/dispatch"
"github.com/coreos/go-systemd/daemon"
- "github.com/ghodss/yaml"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)
+var Command cmd.Handler = service.Command(arvados.ServiceNameDispatchSLURM, newHandler)
+
+func newHandler(ctx context.Context, cluster *arvados.Cluster, _ string, _ *prometheus.Registry) service.Handler {
+ logger := ctxlog.FromContext(ctx)
+ disp := &Dispatcher{logger: logger, cluster: cluster}
+ if err := disp.configure(); err != nil {
+ return service.ErrorHandler(ctx, cluster, err)
+ }
+ disp.setup()
+ go func() {
+ disp.err = disp.run()
+ close(disp.done)
+ }()
+ return disp
+}
+
type logger interface {
dispatch.Logger
Fatalf(string, ...interface{})
@@ -35,10 +51,6 @@ type logger interface {
const initialNiceValue int64 = 10000
-var (
- version = "dev"
-)
-
type Dispatcher struct {
*dispatch.Dispatcher
logger logrus.FieldLogger
@@ -46,75 +58,32 @@ type Dispatcher struct {
sqCheck *SqueueChecker
slurm Slurm
+ done chan struct{}
+ err error
+
Client arvados.Client
}
-func main() {
- logger := logrus.StandardLogger()
- if os.Getenv("DEBUG") != "" {
- logger.SetLevel(logrus.DebugLevel)
- }
- logger.Formatter = &logrus.JSONFormatter{
- TimestampFormat: "2006-01-02T15:04:05.000000000Z07:00",
- }
- disp := &Dispatcher{logger: logger}
- err := disp.Run(os.Args[0], os.Args[1:])
- if err != nil {
- logrus.Fatalf("%s", err)
- }
+func (disp *Dispatcher) CheckHealth() error {
+ return disp.err
}
-func (disp *Dispatcher) Run(prog string, args []string) error {
- if err := disp.configure(prog, args); err != nil {
- return err
- }
- disp.setup()
- return disp.run()
+func (disp *Dispatcher) Done() <-chan struct{} {
+ return disp.done
+}
+
+func (disp *Dispatcher) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ http.NotFound(w, r)
}
-// configure() loads config files. Tests skip this.
-func (disp *Dispatcher) configure(prog string, args []string) error {
+// configure() loads config files. Some tests skip this (see
+// StubbedSuite).
+func (disp *Dispatcher) configure() error {
if disp.logger == nil {
disp.logger = logrus.StandardLogger()
}
- flags := flag.NewFlagSet(prog, flag.ContinueOnError)
- flags.Usage = func() { usage(flags) }
-
- loader := config.NewLoader(nil, disp.logger)
- loader.SetupFlags(flags)
-
- dumpConfig := flag.Bool(
- "dump-config",
- false,
- "write current configuration to stdout and exit")
- getVersion := flags.Bool(
- "version",
- false,
- "Print version information and exit.")
-
- args = loader.MungeLegacyConfigArgs(disp.logger, args, "-legacy-crunch-dispatch-slurm-config")
- if ok, code := cmd.ParseFlags(flags, prog, args, "", os.Stderr); !ok {
- os.Exit(code)
- }
-
- // Print version information if requested
- if *getVersion {
- fmt.Printf("crunch-dispatch-slurm %s\n", version)
- return nil
- }
-
- disp.logger.Printf("crunch-dispatch-slurm %s started", version)
-
- cfg, err := loader.Load()
- if err != nil {
- return err
- }
-
- if disp.cluster, err = cfg.GetCluster(""); err != nil {
- return fmt.Errorf("config error: %s", err)
- }
-
disp.logger = disp.logger.WithField("ClusterID", disp.cluster.ClusterID)
+ disp.logger.Printf("crunch-dispatch-slurm %s started", cmd.Version.String())
disp.Client.APIHost = disp.cluster.Services.Controller.ExternalURL.Host
disp.Client.AuthToken = disp.cluster.SystemRootToken
@@ -137,23 +106,12 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
} else {
disp.logger.Warnf("Client credentials missing from config, so falling back on environment variables (deprecated).")
}
-
- if *dumpConfig {
- out, err := yaml.Marshal(cfg)
- if err != nil {
- return err
- }
- _, err = os.Stdout.Write(out)
- if err != nil {
- return err
- }
- }
-
return nil
}
// setup() initializes private fields after configure().
func (disp *Dispatcher) setup() {
+ disp.done = make(chan struct{})
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
disp.logger.Fatalf("Error making Arvados client: %v", err)
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
index cf83257da..fb433e65c 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
@@ -2,12 +2,13 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"bytes"
"context"
"errors"
+ "flag"
"fmt"
"io"
"io/ioutil"
@@ -19,10 +20,13 @@ import (
"testing"
"time"
+ "git.arvados.org/arvados.git/lib/cmd"
+ "git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/lib/dispatchcloud"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/dispatch"
"github.com/sirupsen/logrus"
. "gopkg.in/check.v1"
@@ -387,6 +391,7 @@ func (s *StubbedSuite) TestSbatchPartition(c *C) {
}
func (s *StubbedSuite) TestLoadLegacyConfig(c *C) {
+ log := ctxlog.TestLogger(c)
content := []byte(`
Client:
APIHost: example.com
@@ -402,36 +407,42 @@ ReserveExtraRAM: 12345
MinRetryPeriod: 13s
BatchSize: 99
`)
- tmpfile, err := ioutil.TempFile("", "example")
- if err != nil {
- c.Error(err)
- }
-
- defer os.Remove(tmpfile.Name()) // clean up
-
- if _, err := tmpfile.Write(content); err != nil {
- c.Error(err)
- }
- if err := tmpfile.Close(); err != nil {
- c.Error(err)
+ tmpfile := c.MkDir() + "/config.yml"
+ err := ioutil.WriteFile(tmpfile, content, 0777)
+ c.Assert(err, IsNil)
- }
os.Setenv("ARVADOS_KEEP_SERVICES", "")
- err = s.disp.configure("crunch-dispatch-slurm", []string{"-config", tmpfile.Name()})
- c.Check(err, IsNil)
- c.Check(s.disp.cluster.Services.Controller.ExternalURL, Equals, arvados.URL{Scheme: "https", Host: "example.com", Path: "/"})
- c.Check(s.disp.cluster.SystemRootToken, Equals, "abcdefg")
- c.Check(s.disp.cluster.Containers.SLURM.SbatchArgumentsList, DeepEquals, []string{"--foo", "bar"})
- c.Check(s.disp.cluster.Containers.CloudVMs.PollInterval, Equals, arvados.Duration(12*time.Second))
- c.Check(s.disp.cluster.Containers.SLURM.PrioritySpread, Equals, int64(42))
- c.Check(s.disp.cluster.Containers.CrunchRunCommand, Equals, "x-crunch-run")
- c.Check(s.disp.cluster.Containers.CrunchRunArgumentsList, DeepEquals, []string{"--cgroup-parent-subsystem=memory"})
- c.Check(s.disp.cluster.Containers.ReserveExtraRAM, Equals, arvados.ByteSize(12345))
- c.Check(s.disp.cluster.Containers.MinRetryPeriod, Equals, arvados.Duration(13*time.Second))
- c.Check(s.disp.cluster.API.MaxItemsPerResponse, Equals, 99)
- c.Check(s.disp.cluster.Containers.SLURM.SbatchEnvironmentVariables, DeepEquals, map[string]string{
+ flags := flag.NewFlagSet("", flag.ContinueOnError)
+ flags.SetOutput(os.Stderr)
+ loader := config.NewLoader(&bytes.Buffer{}, log)
+ loader.SetupFlags(flags)
+ args := loader.MungeLegacyConfigArgs(log, []string{"-config", tmpfile}, "-legacy-"+string(arvados.ServiceNameDispatchSLURM)+"-config")
+ ok, _ := cmd.ParseFlags(flags, "crunch-dispatch-slurm", args, "", os.Stderr)
+ c.Check(ok, Equals, true)
+ cfg, err := loader.Load()
+ c.Assert(err, IsNil)
+ cluster, err := cfg.GetCluster("")
+ c.Assert(err, IsNil)
+
+ c.Check(cluster.Services.Controller.ExternalURL, Equals, arvados.URL{Scheme: "https", Host: "example.com", Path: "/"})
+ c.Check(cluster.SystemRootToken, Equals, "abcdefg")
+ c.Check(cluster.Containers.SLURM.SbatchArgumentsList, DeepEquals, []string{"--foo", "bar"})
+ c.Check(cluster.Containers.CloudVMs.PollInterval, Equals, arvados.Duration(12*time.Second))
+ c.Check(cluster.Containers.SLURM.PrioritySpread, Equals, int64(42))
+ c.Check(cluster.Containers.CrunchRunCommand, Equals, "x-crunch-run")
+ c.Check(cluster.Containers.CrunchRunArgumentsList, DeepEquals, []string{"--cgroup-parent-subsystem=memory"})
+ c.Check(cluster.Containers.ReserveExtraRAM, Equals, arvados.ByteSize(12345))
+ c.Check(cluster.Containers.MinRetryPeriod, Equals, arvados.Duration(13*time.Second))
+ c.Check(cluster.API.MaxItemsPerResponse, Equals, 99)
+ c.Check(cluster.Containers.SLURM.SbatchEnvironmentVariables, DeepEquals, map[string]string{
"ARVADOS_KEEP_SERVICES": "https://example.com/keep1 https://example.com/keep2",
})
+
+ // Ensure configure() copies SbatchEnvironmentVariables into
+ // the current process's environment (that's how they end up
+ // getting passed to sbatch).
+ s.disp.cluster = cluster
+ s.disp.configure()
c.Check(os.Getenv("ARVADOS_KEEP_SERVICES"), Equals, "https://example.com/keep1 https://example.com/keep2")
}
diff --git a/services/crunch-dispatch-slurm/node_type.go b/services/crunch-dispatch-slurm/node_type.go
index d31322f18..738426c92 100644
--- a/services/crunch-dispatch-slurm/node_type.go
+++ b/services/crunch-dispatch-slurm/node_type.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"log"
diff --git a/services/crunch-dispatch-slurm/priority.go b/services/crunch-dispatch-slurm/priority.go
index 2312ce595..515a98d32 100644
--- a/services/crunch-dispatch-slurm/priority.go
+++ b/services/crunch-dispatch-slurm/priority.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
const defaultSpread int64 = 10
diff --git a/services/crunch-dispatch-slurm/priority_test.go b/services/crunch-dispatch-slurm/priority_test.go
index e80984c0f..df1c27def 100644
--- a/services/crunch-dispatch-slurm/priority_test.go
+++ b/services/crunch-dispatch-slurm/priority_test.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
. "gopkg.in/check.v1"
diff --git a/services/crunch-dispatch-slurm/script.go b/services/crunch-dispatch-slurm/script.go
index f559104d1..fb16e593e 100644
--- a/services/crunch-dispatch-slurm/script.go
+++ b/services/crunch-dispatch-slurm/script.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"strings"
diff --git a/services/crunch-dispatch-slurm/script_test.go b/services/crunch-dispatch-slurm/script_test.go
index a21aeedda..00d70190d 100644
--- a/services/crunch-dispatch-slurm/script_test.go
+++ b/services/crunch-dispatch-slurm/script_test.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
. "gopkg.in/check.v1"
diff --git a/services/crunch-dispatch-slurm/slurm.go b/services/crunch-dispatch-slurm/slurm.go
index 791f294df..e59826f76 100644
--- a/services/crunch-dispatch-slurm/slurm.go
+++ b/services/crunch-dispatch-slurm/slurm.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"fmt"
diff --git a/services/crunch-dispatch-slurm/squeue.go b/services/crunch-dispatch-slurm/squeue.go
index eae21e62b..d4e41ed1f 100644
--- a/services/crunch-dispatch-slurm/squeue.go
+++ b/services/crunch-dispatch-slurm/squeue.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"bytes"
diff --git a/services/crunch-dispatch-slurm/squeue_test.go b/services/crunch-dispatch-slurm/squeue_test.go
index ce74fe61c..d41e1982b 100644
--- a/services/crunch-dispatch-slurm/squeue_test.go
+++ b/services/crunch-dispatch-slurm/squeue_test.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"time"
diff --git a/services/crunch-dispatch-slurm/usage.go b/services/crunch-dispatch-slurm/usage.go
index 68a2305f7..785843b19 100644
--- a/services/crunch-dispatch-slurm/usage.go
+++ b/services/crunch-dispatch-slurm/usage.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"flag"
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list