[ARVADOS] created: 2.1.0-2449-g217aba73f
Git user
git at public.arvados.org
Tue May 10 17:55:41 UTC 2022
at 217aba73f6366cf1af30683baa6d0d5d1e3407a9 (commit)
commit 217aba73f6366cf1af30683baa6d0d5d1e3407a9
Author: Tom Clegg <tom at curii.com>
Date: Tue May 10 13:55:20 2022 -0400
18947: Move arvados-dispatch-slurm into arvados-server binary.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/build/run-build-packages.sh b/build/run-build-packages.sh
index 26705c066..c4415b469 100755
--- a/build/run-build-packages.sh
+++ b/build/run-build-packages.sh
@@ -248,7 +248,7 @@ package_go_binary cmd/arvados-server arvados-git-httpd "$FORMAT" "$ARCH" \
"Provide authenticated http access to Arvados-hosted git repositories"
package_go_binary services/crunch-dispatch-local crunch-dispatch-local "$FORMAT" "$ARCH" \
"Dispatch Crunch containers on the local system"
-package_go_binary services/crunch-dispatch-slurm crunch-dispatch-slurm "$FORMAT" "$ARCH" \
+package_go_binary cmd/arvados-server crunch-dispatch-slurm "$FORMAT" "$ARCH" \
"Dispatch Crunch containers to a SLURM cluster"
package_go_binary cmd/arvados-server crunch-run "$FORMAT" "$ARCH" \
"Supervise a single Crunch container"
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.service b/cmd/arvados-server/crunch-dispatch-slurm.service
similarity index 88%
rename from services/crunch-dispatch-slurm/crunch-dispatch-slurm.service
rename to cmd/arvados-server/crunch-dispatch-slurm.service
index 86830f3a7..51b4e58c3 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.service
+++ b/cmd/arvados-server/crunch-dispatch-slurm.service
@@ -6,18 +6,19 @@
Description=Arvados Crunch Dispatcher for SLURM
Documentation=https://doc.arvados.org/
After=network.target
+AssertPathExists=/etc/arvados/config.yml
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
[Service]
Type=notify
+EnvironmentFile=-/etc/arvados/environment
ExecStart=/usr/bin/crunch-dispatch-slurm
# Set a reasonable default for the open file limit
LimitNOFILE=65536
Restart=always
RestartSec=1
-LimitNOFILE=1000000
# systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
StartLimitInterval=0
diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid
index 227a8cf07..25cb26b40 100644
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -32,6 +32,10 @@ h2(#main). development main (as of 2022-04-08)
"previous: Upgrading to 2.4.0":#v2_4_0
+h3. Slurm dispatcher requires configuration update
+
+If you use the Slurm dispatcher (@crunch-dispatch-slurm@) you must add a @Services.DispatchSLURM.InternalURLs@ section to your configuration file, as shown on the "updated install page":{{site.baseurl}}/install/crunch2-slurm/install-dispatch.html.
+
h3. Now recommending Singularity 3.9.9
The compute image "build script":{{site.baseurl}}/install/crunch2-cloud/install-compute-node.html now installs Singularity 3.9.9 instead of 3.7.4. The newer version includes a bugfix that should resolve "intermittent loopback device errors":https://dev.arvados.org/issues/18489 when running containers.
diff --git a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
index 52553a35e..9b664ec9e 100644
--- a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
+++ b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
@@ -26,9 +26,18 @@ This assumes you already have a Slurm cluster, and have set up all of your compu
The Arvados Slurm dispatcher can run on any node that can submit requests to both the Arvados API server and the Slurm controller (via @sbatch@). It is not resource-intensive, so you can run it on the API server node.
-h2(#update-config). Update config.yml (optional)
+h2(#update-config). Update config.yml
-Crunch-dispatch-slurm reads the common configuration file at @config.yml at .
+Crunch-dispatch-slurm reads the common configuration file at @/etc/arvados/config.yml at .
+
+Add a DispatchSLURM entry to the Services section, using the hostname where @crunch-dispatch-slurm@ will run, and an available port:
+
+<notextile>
+<pre> Services:
+ DispatchSLURM:
+ InternalURLs:
+ "http://<code class="userinput">hostname.zzzzz.arvadosapi.com:9007</code>": {}</pre>
+</notextile>
The following configuration parameters are optional.
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index e60880c21..893542df1 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -55,6 +55,9 @@ Clusters:
DispatchLSF:
InternalURLs: {SAMPLE: {}}
ExternalURL: ""
+ DispatchSLURM:
+ InternalURLs: {SAMPLE: {}}
+ ExternalURL: ""
Keepproxy:
InternalURLs: {SAMPLE: {}}
ExternalURL: ""
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index f0adcda5f..ace33c9ff 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -348,6 +348,7 @@ type Services struct {
Controller Service
DispatchCloud Service
DispatchLSF Service
+ DispatchSLURM Service
GitHTTP Service
GitSSH Service
Health Service
@@ -631,6 +632,7 @@ const (
ServiceNameController ServiceName = "arvados-controller"
ServiceNameDispatchCloud ServiceName = "arvados-dispatch-cloud"
ServiceNameDispatchLSF ServiceName = "arvados-dispatch-lsf"
+ ServiceNameDispatchSLURM ServiceName = "crunch-dispatch-slurm"
ServiceNameGitHTTP ServiceName = "arvados-git-httpd"
ServiceNameHealth ServiceName = "arvados-health"
ServiceNameKeepbalance ServiceName = "keep-balance"
@@ -650,6 +652,7 @@ func (svcs Services) Map() map[ServiceName]Service {
ServiceNameController: svcs.Controller,
ServiceNameDispatchCloud: svcs.DispatchCloud,
ServiceNameDispatchLSF: svcs.DispatchLSF,
+ ServiceNameDispatchSLURM: svcs.DispatchSLURM,
ServiceNameGitHTTP: svcs.GitHTTP,
ServiceNameHealth: svcs.Health,
ServiceNameKeepbalance: svcs.Keepbalance,
diff --git a/sdk/go/health/aggregator_test.go b/sdk/go/health/aggregator_test.go
index f8f7ff9f1..414902089 100644
--- a/sdk/go/health/aggregator_test.go
+++ b/sdk/go/health/aggregator_test.go
@@ -293,6 +293,7 @@ func (s *AggregatorSuite) setAllServiceURLs(listen string) {
&svcs.Controller,
&svcs.DispatchCloud,
&svcs.DispatchLSF,
+ &svcs.DispatchSLURM,
&svcs.GitHTTP,
&svcs.Keepbalance,
&svcs.Keepproxy,
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
index 84105e1fc..c31d79975 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -2,32 +2,48 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
-
// Dispatcher service for Crunch that submits containers to the slurm queue.
+package dispatchslurm
import (
"context"
- "flag"
"fmt"
"log"
"math"
+ "net/http"
"os"
"regexp"
"strings"
"time"
"git.arvados.org/arvados.git/lib/cmd"
- "git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/lib/dispatchcloud"
+ "git.arvados.org/arvados.git/lib/service"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/dispatch"
"github.com/coreos/go-systemd/daemon"
- "github.com/ghodss/yaml"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)
+var Command cmd.Handler = service.Command(arvados.ServiceNameDispatchSLURM, newHandler)
+
+func newHandler(ctx context.Context, cluster *arvados.Cluster, _ string, _ *prometheus.Registry) service.Handler {
+ logger := ctxlog.FromContext(ctx)
+ disp := &Dispatcher{logger: logger, cluster: cluster}
+ if err := disp.configure(); err != nil {
+ return service.ErrorHandler(ctx, cluster, err)
+ }
+ disp.setup()
+ go func() {
+ disp.err = disp.run()
+ close(disp.done)
+ }()
+ return disp
+}
+
type logger interface {
dispatch.Logger
Fatalf(string, ...interface{})
@@ -35,10 +51,6 @@ type logger interface {
const initialNiceValue int64 = 10000
-var (
- version = "dev"
-)
-
type Dispatcher struct {
*dispatch.Dispatcher
logger logrus.FieldLogger
@@ -46,75 +58,32 @@ type Dispatcher struct {
sqCheck *SqueueChecker
slurm Slurm
+ done chan struct{}
+ err error
+
Client arvados.Client
}
-func main() {
- logger := logrus.StandardLogger()
- if os.Getenv("DEBUG") != "" {
- logger.SetLevel(logrus.DebugLevel)
- }
- logger.Formatter = &logrus.JSONFormatter{
- TimestampFormat: "2006-01-02T15:04:05.000000000Z07:00",
- }
- disp := &Dispatcher{logger: logger}
- err := disp.Run(os.Args[0], os.Args[1:])
- if err != nil {
- logrus.Fatalf("%s", err)
- }
+func (disp *Dispatcher) CheckHealth() error {
+ return disp.err
}
-func (disp *Dispatcher) Run(prog string, args []string) error {
- if err := disp.configure(prog, args); err != nil {
- return err
- }
- disp.setup()
- return disp.run()
+func (disp *Dispatcher) Done() <-chan struct{} {
+ return disp.done
+}
+
+func (disp *Dispatcher) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ http.NotFound(w, r)
}
-// configure() loads config files. Tests skip this.
-func (disp *Dispatcher) configure(prog string, args []string) error {
+// configure() loads config files. Some tests skip this (see
+// StubbedSuite).
+func (disp *Dispatcher) configure() error {
if disp.logger == nil {
disp.logger = logrus.StandardLogger()
}
- flags := flag.NewFlagSet(prog, flag.ContinueOnError)
- flags.Usage = func() { usage(flags) }
-
- loader := config.NewLoader(nil, disp.logger)
- loader.SetupFlags(flags)
-
- dumpConfig := flag.Bool(
- "dump-config",
- false,
- "write current configuration to stdout and exit")
- getVersion := flags.Bool(
- "version",
- false,
- "Print version information and exit.")
-
- args = loader.MungeLegacyConfigArgs(disp.logger, args, "-legacy-crunch-dispatch-slurm-config")
- if ok, code := cmd.ParseFlags(flags, prog, args, "", os.Stderr); !ok {
- os.Exit(code)
- }
-
- // Print version information if requested
- if *getVersion {
- fmt.Printf("crunch-dispatch-slurm %s\n", version)
- return nil
- }
-
- disp.logger.Printf("crunch-dispatch-slurm %s started", version)
-
- cfg, err := loader.Load()
- if err != nil {
- return err
- }
-
- if disp.cluster, err = cfg.GetCluster(""); err != nil {
- return fmt.Errorf("config error: %s", err)
- }
-
disp.logger = disp.logger.WithField("ClusterID", disp.cluster.ClusterID)
+ disp.logger.Printf("crunch-dispatch-slurm %s started", cmd.Version.String())
disp.Client.APIHost = disp.cluster.Services.Controller.ExternalURL.Host
disp.Client.AuthToken = disp.cluster.SystemRootToken
@@ -137,23 +106,12 @@ func (disp *Dispatcher) configure(prog string, args []string) error {
} else {
disp.logger.Warnf("Client credentials missing from config, so falling back on environment variables (deprecated).")
}
-
- if *dumpConfig {
- out, err := yaml.Marshal(cfg)
- if err != nil {
- return err
- }
- _, err = os.Stdout.Write(out)
- if err != nil {
- return err
- }
- }
-
return nil
}
// setup() initializes private fields after configure().
func (disp *Dispatcher) setup() {
+ disp.done = make(chan struct{})
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
disp.logger.Fatalf("Error making Arvados client: %v", err)
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
index cf83257da..fb433e65c 100644
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
@@ -2,12 +2,13 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"bytes"
"context"
"errors"
+ "flag"
"fmt"
"io"
"io/ioutil"
@@ -19,10 +20,13 @@ import (
"testing"
"time"
+ "git.arvados.org/arvados.git/lib/cmd"
+ "git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/lib/dispatchcloud"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/dispatch"
"github.com/sirupsen/logrus"
. "gopkg.in/check.v1"
@@ -387,6 +391,7 @@ func (s *StubbedSuite) TestSbatchPartition(c *C) {
}
func (s *StubbedSuite) TestLoadLegacyConfig(c *C) {
+ log := ctxlog.TestLogger(c)
content := []byte(`
Client:
APIHost: example.com
@@ -402,36 +407,42 @@ ReserveExtraRAM: 12345
MinRetryPeriod: 13s
BatchSize: 99
`)
- tmpfile, err := ioutil.TempFile("", "example")
- if err != nil {
- c.Error(err)
- }
-
- defer os.Remove(tmpfile.Name()) // clean up
-
- if _, err := tmpfile.Write(content); err != nil {
- c.Error(err)
- }
- if err := tmpfile.Close(); err != nil {
- c.Error(err)
+ tmpfile := c.MkDir() + "/config.yml"
+ err := ioutil.WriteFile(tmpfile, content, 0777)
+ c.Assert(err, IsNil)
- }
os.Setenv("ARVADOS_KEEP_SERVICES", "")
- err = s.disp.configure("crunch-dispatch-slurm", []string{"-config", tmpfile.Name()})
- c.Check(err, IsNil)
- c.Check(s.disp.cluster.Services.Controller.ExternalURL, Equals, arvados.URL{Scheme: "https", Host: "example.com", Path: "/"})
- c.Check(s.disp.cluster.SystemRootToken, Equals, "abcdefg")
- c.Check(s.disp.cluster.Containers.SLURM.SbatchArgumentsList, DeepEquals, []string{"--foo", "bar"})
- c.Check(s.disp.cluster.Containers.CloudVMs.PollInterval, Equals, arvados.Duration(12*time.Second))
- c.Check(s.disp.cluster.Containers.SLURM.PrioritySpread, Equals, int64(42))
- c.Check(s.disp.cluster.Containers.CrunchRunCommand, Equals, "x-crunch-run")
- c.Check(s.disp.cluster.Containers.CrunchRunArgumentsList, DeepEquals, []string{"--cgroup-parent-subsystem=memory"})
- c.Check(s.disp.cluster.Containers.ReserveExtraRAM, Equals, arvados.ByteSize(12345))
- c.Check(s.disp.cluster.Containers.MinRetryPeriod, Equals, arvados.Duration(13*time.Second))
- c.Check(s.disp.cluster.API.MaxItemsPerResponse, Equals, 99)
- c.Check(s.disp.cluster.Containers.SLURM.SbatchEnvironmentVariables, DeepEquals, map[string]string{
+ flags := flag.NewFlagSet("", flag.ContinueOnError)
+ flags.SetOutput(os.Stderr)
+ loader := config.NewLoader(&bytes.Buffer{}, log)
+ loader.SetupFlags(flags)
+ args := loader.MungeLegacyConfigArgs(log, []string{"-config", tmpfile}, "-legacy-"+string(arvados.ServiceNameDispatchSLURM)+"-config")
+ ok, _ := cmd.ParseFlags(flags, "crunch-dispatch-slurm", args, "", os.Stderr)
+ c.Check(ok, Equals, true)
+ cfg, err := loader.Load()
+ c.Assert(err, IsNil)
+ cluster, err := cfg.GetCluster("")
+ c.Assert(err, IsNil)
+
+ c.Check(cluster.Services.Controller.ExternalURL, Equals, arvados.URL{Scheme: "https", Host: "example.com", Path: "/"})
+ c.Check(cluster.SystemRootToken, Equals, "abcdefg")
+ c.Check(cluster.Containers.SLURM.SbatchArgumentsList, DeepEquals, []string{"--foo", "bar"})
+ c.Check(cluster.Containers.CloudVMs.PollInterval, Equals, arvados.Duration(12*time.Second))
+ c.Check(cluster.Containers.SLURM.PrioritySpread, Equals, int64(42))
+ c.Check(cluster.Containers.CrunchRunCommand, Equals, "x-crunch-run")
+ c.Check(cluster.Containers.CrunchRunArgumentsList, DeepEquals, []string{"--cgroup-parent-subsystem=memory"})
+ c.Check(cluster.Containers.ReserveExtraRAM, Equals, arvados.ByteSize(12345))
+ c.Check(cluster.Containers.MinRetryPeriod, Equals, arvados.Duration(13*time.Second))
+ c.Check(cluster.API.MaxItemsPerResponse, Equals, 99)
+ c.Check(cluster.Containers.SLURM.SbatchEnvironmentVariables, DeepEquals, map[string]string{
"ARVADOS_KEEP_SERVICES": "https://example.com/keep1 https://example.com/keep2",
})
+
+ // Ensure configure() copies SbatchEnvironmentVariables into
+ // the current process's environment (that's how they end up
+ // getting passed to sbatch).
+ s.disp.cluster = cluster
+ s.disp.configure()
c.Check(os.Getenv("ARVADOS_KEEP_SERVICES"), Equals, "https://example.com/keep1 https://example.com/keep2")
}
diff --git a/services/crunch-dispatch-slurm/node_type.go b/services/crunch-dispatch-slurm/node_type.go
index d31322f18..738426c92 100644
--- a/services/crunch-dispatch-slurm/node_type.go
+++ b/services/crunch-dispatch-slurm/node_type.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"log"
diff --git a/services/crunch-dispatch-slurm/priority.go b/services/crunch-dispatch-slurm/priority.go
index 2312ce595..515a98d32 100644
--- a/services/crunch-dispatch-slurm/priority.go
+++ b/services/crunch-dispatch-slurm/priority.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
const defaultSpread int64 = 10
diff --git a/services/crunch-dispatch-slurm/priority_test.go b/services/crunch-dispatch-slurm/priority_test.go
index e80984c0f..df1c27def 100644
--- a/services/crunch-dispatch-slurm/priority_test.go
+++ b/services/crunch-dispatch-slurm/priority_test.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
. "gopkg.in/check.v1"
diff --git a/services/crunch-dispatch-slurm/script.go b/services/crunch-dispatch-slurm/script.go
index f559104d1..fb16e593e 100644
--- a/services/crunch-dispatch-slurm/script.go
+++ b/services/crunch-dispatch-slurm/script.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"strings"
diff --git a/services/crunch-dispatch-slurm/script_test.go b/services/crunch-dispatch-slurm/script_test.go
index a21aeedda..00d70190d 100644
--- a/services/crunch-dispatch-slurm/script_test.go
+++ b/services/crunch-dispatch-slurm/script_test.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
. "gopkg.in/check.v1"
diff --git a/services/crunch-dispatch-slurm/slurm.go b/services/crunch-dispatch-slurm/slurm.go
index 791f294df..e59826f76 100644
--- a/services/crunch-dispatch-slurm/slurm.go
+++ b/services/crunch-dispatch-slurm/slurm.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"fmt"
diff --git a/services/crunch-dispatch-slurm/squeue.go b/services/crunch-dispatch-slurm/squeue.go
index eae21e62b..d4e41ed1f 100644
--- a/services/crunch-dispatch-slurm/squeue.go
+++ b/services/crunch-dispatch-slurm/squeue.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"bytes"
diff --git a/services/crunch-dispatch-slurm/squeue_test.go b/services/crunch-dispatch-slurm/squeue_test.go
index ce74fe61c..d41e1982b 100644
--- a/services/crunch-dispatch-slurm/squeue_test.go
+++ b/services/crunch-dispatch-slurm/squeue_test.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"time"
diff --git a/services/crunch-dispatch-slurm/usage.go b/services/crunch-dispatch-slurm/usage.go
index 68a2305f7..785843b19 100644
--- a/services/crunch-dispatch-slurm/usage.go
+++ b/services/crunch-dispatch-slurm/usage.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: AGPL-3.0
-package main
+package dispatchslurm
import (
"flag"
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list